Initial import

2023-05-29 10:41:03 +02:00 · 2023-05-29 10:41:03 +02:00 · 56a50eead3
commit 56a50eead3
820 changed files with 192077 additions and 0 deletions
--- a/libLSS/CMakeLists.txt
+++ b/libLSS/CMakeLists.txt
@ -0,0 +1,121 @@
+SET(ARES_INCLUDE_PATH ${CMAKE_BINARY_DIR} ${r3d_SOURCE_DIR})
+
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ares_version.cpp.in ${CMAKE_CURRENT_BINARY_DIR}/ares_version.cpp)
+
+set(ares_LINK)
+
+set(LIBLSS_CONFIG_FILE_NAME ${CMAKE_BINARY_DIR}/libLSS/cconfig.h)
+set(LIBLSS_CONFIG_NEW_FILE_NAME ${CMAKE_BINARY_DIR}/libLSS/cconfig_new.h)
+
+function(CHECK_PRETTY)
+  FILE(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/check_pretty.dir)
+  FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/check_pretty.dir/check.cpp "int main() { const char *s = __PRETTY_FUNCTION__; return 0;}")
+  try_compile(RESULT_CHECK_FUNCTION ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/check_pretty.dir SOURCES ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/check_pretty.dir/check.cpp)
+
+  if (RESULT_CHECK_FUNCTION)
+    set(_DEF_PRETTY "#define __LIBLSS_PRETTY_FUNCTION_AVAILABLE 1\n")
+  else()
+    set(_DEF_PRETTY "#undef __LIBLSS_PRETTY_FUNCTION_AVAILABLE\n")
+  endif()
+  file(APPEND ${LIBLSS_CONFIG_NEW_FILE_NAME} ${_DEF_PRETTY})
+  file(APPEND ${LIBLSS_CONFIG_NEW_FILE_NAME} "#define BOOST_BIND_GLOBAL_PLACEHOLDERS 1\n")
+  if (STACKTRACE_USE_BACKTRACE)
+    file(APPEND ${LIBLSS_CONFIG_NEW_FILE_NAME} "#define BOOST_STACKTRACE_USE_BACKTRACE 1\n")
+  endif()
+  file(APPEND ${LIBLSS_CONFIG_NEW_FILE_NAME} "#define BOOST_DISABLE_PRAGMA_MESSAGE 1\n")
+  if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    file(APPEND ${LIBLSS_CONFIG_NEW_FILE_NAME} "#define BOOST_STACKTRACE_GNU_SOURCE_NOT_REQUIRED 1\n")
+  endif()
+  file(APPEND ${LIBLSS_CONFIG_NEW_FILE_NAME} "#define __LIBLSS_TEST_REFERENCE_PATH \"${CMAKE_SOURCE_DIR}/libLSS/tests/data/reference_data.h5\"\n")
+endfunction()
+
+CHECK_PRETTY()
+
+
+IF(ENABLE_MPI)
+    SET(EXTRA_LIBLSS mpi/real_mpi/mpi_communication.cpp)
+ELSE(ENABLE_MPI)
+    SET(EXTRA_LIBLSS mpi/fake_mpi/mpi_communication.cpp)
+ENDIF(ENABLE_MPI)
+
+include(ares_module)
+
+SET(ARES
+    samplers/ares/gibbs_messenger.cpp
+    samplers/ares/powerspectrum_a_sampler.cpp
+    samplers/ares/powerspectrum_b_sampler.cpp
+    samplers/ares/powerspectrum_c_sampler.cpp
+    samplers/ares/linbias_sampler.cpp
+    samplers/ares/synthetic_selection.cpp
+)
+SET(MODULE_BUILT "")
+foreach(module IN LISTS ARES_MODULES)
+  add_liblss_module(${module})
+  IF (BUILD_ARES_MODULE_${module})
+    SET(MODULE_BUILT "${MODULE_BUILT}\n#define ARES_SUPPORT_${module} 1")
+  ENDIF()
+endforeach()
+FILE(WRITE ${CMAKE_BINARY_DIR}/libLSS/samplers/ares/ares_sampler_option.hpp "${MODULE_BUILT}")
+
+SET(ARES_INCLUDE_PATH ${ARES_INCLUDE_PATH} PARENT_SCOPE)
+
+include_directories(${ARES_INCLUDE_PATH} ${HDF5_INCLUDE_DIR} ${EIGEN_INCLUDE_DIRS})
+
+add_library(LSS
+    ${CMAKE_CURRENT_BINARY_DIR}/ares_version.cpp
+    physics/cosmo.cpp
+    physics/class_cosmo.cpp
+    tools/static_init.cpp
+    tools/log_traits.cpp
+    tools/console.cpp
+    tools/sigcatcher.cpp
+    mcmc/state_element.cpp
+    samplers/core/main_loop.cpp
+    samplers/core/gig_sampler.cc
+    samplers/core/powerspec_tools.cpp
+    tools/fftw_allocator.cpp
+    tools/hdf5_error.cpp
+    tools/gsl_error.cpp
+    tools/memusage.cpp
+    tools/string_tools.cpp
+    tools/domains.cpp
+    ${EXTRA_LIBLSS}
+    ${ARES}
+    ${r3d_SOURCE_DIR}/r3d.c
+)
+target_link_libraries(LSS ${ares_LINK} ${BOOST_LIBRARIES} ${LIBCLASS_PATH} ${BACKTRACE_LIBRARY})
+if (ENABLE_FULL_WARNINGS)
+  target_compile_options(LSS PRIVATE
+    $<$<CXX_COMPILER_ID:MSVC>:/W4>
+    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall>
+  )
+endif()
+set_property(TARGET LSS PROPERTY POSITION_INDEPENDENT_CODE ${BUILD_PYTHON_EXTENSION})
+
+cmessage(WARNING "ARES deps are : ${ares_DEPS}")
+add_dependencies(LSS ${ares_DEPS})
+
+if (EXISTS ${LIBLSS_CONFIG_FILE_NAME})
+  file(SHA256 ${CMAKE_BINARY_DIR}/libLSS/cconfig_new.h _CONFIG_HASH_NEW)
+  file(SHA256 ${CMAKE_BINARY_DIR}/libLSS/cconfig.h _CONFIG_HASH)
+
+  IF(${_CONFIG_HASH} STREQUAL ${_CONFIG_HASH_NEW})
+     file(REMOVE ${LIBLSS_CONFIG_NEW_FILE_NAME})
+  ELSE()
+     file(REMOVE ${LIBLSS_CONFIG_FILE_NAME})
+     file(RENAME ${LIBLSS_CONFIG_NEW_FILE_NAME} ${LIBLSS_CONFIG_FILE_NAME})
+  ENDIF()
+ELSE()
+  file(RENAME ${LIBLSS_CONFIG_NEW_FILE_NAME} ${LIBLSS_CONFIG_FILE_NAME})
+ENDIF()
+
+execute_process(COMMAND bash ${CMAKE_SOURCE_DIR}/get-aquila-modules.sh --report -q
+                OUTPUT_VARIABLE _GIT_STATE
+                WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+)
+FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git_report.txt "${_GIT_STATE}")
+execute_process(COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/build_tools/gen_code_in_header.py 
+        ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git_report.txt ${CMAKE_CURRENT_BINARY_DIR}/git_state.cpp
+)
+
+subdirs(tests)
--- a/libLSS/ares_version.cpp.in
+++ b/libLSS/ares_version.cpp.in
@ -0,0 +1,7 @@
+#include "libLSS/ares_version.hpp"
+
+const std::string LibLSS::ARES_GIT_VERSION = "@GIT_VER@"; 
+const std::string LibLSS::ARES_BUILTIN_MODULES = "@ARES_MODULES@"; 
+const std::string LibLSS::ARES_GIT_REPORT =
+#include "libLSS/git_state.cpp"
+;
--- a/libLSS/ares_version.hpp
+++ b/libLSS/ares_version.hpp
@ -0,0 +1,27 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/ares_version.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_ARES_VERSION_HPP
+#define __LIBLSS_ARES_VERSION_HPP
+
+#include <string>
+
+namespace LibLSS {
+
+  /// This string holds the GIT version of the ARES root module.
+  extern const std::string ARES_GIT_VERSION;
+
+  /// Holds a semi-colon separated list of the modules that were compiled in.
+  extern const std::string ARES_BUILTIN_MODULES;
+
+  /// Extensive git report on the different git versions used in the final binary.
+  extern const std::string ARES_GIT_REPORT;
+} // namespace LibLSS
+
+#endif
--- a/libLSS/data/angtools.hpp
+++ b/libLSS/data/angtools.hpp
@ -0,0 +1,55 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/angtools.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_ANGTOOLS_HPP
+#define __LIBLSS_ANGTOOLS_HPP
+
+#include <cmath>
+
+namespace LibLSS {
+
+  template <typename T, typename Array>
+  void ang2vec(T ra, T dec, Array &xyz) {
+    T c_ra = std::cos(ra), s_ra = std::sin(ra), c_dec = std::cos(dec),
+      s_dec = std::sin(dec);
+
+    xyz[0] = c_ra * c_dec;
+    xyz[1] = s_ra * c_dec;
+    xyz[2] = s_dec;
+  }
+
+  template <typename T, typename Array>
+  void vec2ang(Array xyz, T &ra, T &dec) {
+
+    T c_r = std::sqrt(xyz[0] * xyz[0] + xyz[1] * xyz[1] + xyz[2] * xyz[2]);
+
+    ra = std::atan2(xyz[1], xyz[0]);
+
+    dec = 0.;
+
+    if (c_r > 0)
+      dec = std::asin(xyz[2] / c_r);
+  }
+
+  template <typename T, typename Array>
+  void vec2ang(Array xyz, T &ra, T &dec, T &r) {
+
+    r = std::sqrt(xyz[0] * xyz[0] + xyz[1] * xyz[1] + xyz[2] * xyz[2]);
+
+    ra = std::atan2(xyz[1], xyz[0]);
+
+    dec = 0.;
+
+    if (r > 0)
+      dec = std::asin(xyz[2] / r);
+  }
+
+}; // namespace LibLSS
+
+#endif
--- a/libLSS/data/base.hpp
+++ b/libLSS/data/base.hpp
@ -0,0 +1,23 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/base.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __ARES2_BASE_HPP
+#define __ARES2_BASE_HPP
+
+namespace LibLSS
+{
+
+    class Base_Data {
+    public:
+    
+    };
+
+};
+
+#endif
--- a/libLSS/data/galaxies.hpp
+++ b/libLSS/data/galaxies.hpp
@ -0,0 +1,97 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/galaxies.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_GALAXIES_HPP
+#define __LIBLSS_GALAXIES_HPP
+
+
+#include <CosmoTool/hdf5_array.hpp>
+
+namespace LibLSS {
+
+    struct BaseGalaxyDescriptor {
+        unsigned long long id;
+        double phi, theta;
+        double zo;
+        double m;
+        double M_abs;
+        double Mgal;
+        double z;
+        double r;
+        double w;
+        double final_w;
+
+        double radius;
+        double spin;
+        double posx, posy ,posz;
+
+        double vx, vy, vz;
+    };
+
+    struct PhotoGalaxyDescriptor {
+        BaseGalaxyDescriptor base;
+        double sigma_z0;
+        int gridid;
+    };
+
+
+    enum GalaxySelectionType {
+      GALAXY_SELECTION_FILE,
+      GALAXY_SELECTION_SCHECHTER,
+      GALAXY_SELECTION_PIECEWISE,
+      HALO_SELECTION_NONE,
+      HALO_SELECTION_MASS,
+      HALO_SELECTION_RADIUS,
+      HALO_SELECTION_SPIN,
+      HALO_SELECTION_MIXED
+    };
+};
+
+  CTOOL_ENUM_TYPE(LibLSS::GalaxySelectionType, HDF5T_GalaxySelectionType,
+    (LibLSS::GALAXY_SELECTION_FILE)
+    (LibLSS::GALAXY_SELECTION_SCHECHTER)
+    (LibLSS::GALAXY_SELECTION_PIECEWISE)
+    (LibLSS::HALO_SELECTION_NONE)
+    (LibLSS::HALO_SELECTION_MASS)
+    (LibLSS::HALO_SELECTION_RADIUS)
+    (LibLSS::HALO_SELECTION_SPIN)
+    (LibLSS::HALO_SELECTION_MIXED)
+  );
+
+  /* HDF5 complex type */
+  CTOOL_STRUCT_TYPE(LibLSS::BaseGalaxyDescriptor, HDF5T_BaseGalaxyDescriptor,
+    ((unsigned long long, id))
+    ((double, phi))
+    ((double, theta))
+    ((double, posx))
+    ((double, posy))
+    ((double, posz))
+	((double, radius))
+    ((double, spin))
+    ((double, zo))
+    ((double, m))
+    ((double, M_abs))
+    ((double, Mgal))
+    ((double, z))
+    ((double, vx))
+    ((double, vy))
+    ((double, vz))
+    ((double, r))
+    ((double, w))
+    ((double, final_w))
+  );
+
+  CTOOL_STRUCT_TYPE(LibLSS::PhotoGalaxyDescriptor, HDF5T_PhotoGalaxyDescriptor,
+    ((LibLSS::BaseGalaxyDescriptor, base))
+    ((double, sigma_z0))
+    ((int, gridid))
+  );
+
+
+#endif
--- a/libLSS/data/integer_window3d.hpp
+++ b/libLSS/data/integer_window3d.hpp
@ -0,0 +1,132 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/integer_window3d.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_MAJORITY_VOTE_WINDOW_3D_HPP
+#define __LIBLSS_MAJORITY_VOTE_WINDOW_3D_HPP
+
+#include <cassert>
+#include <functional>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/openmp.hpp"
+#include <CosmoTool/algo.hpp>
+#include <boost/array.hpp>
+#include <numeric>
+#include <cmath>
+
+namespace LibLSS {
+
+  namespace internalIntegerWindow {
+
+    template <typename SelFunction3d>
+    unsigned int selectionValue(
+        std::array<double, 3> const &x, SelFunction3d const &selfunc) {
+      double r = std::sqrt(x[0] * x[0] + x[1] * x[1] + x[2] * x[2]);
+
+      // *WARNING:* We use a sum here
+      return selfunc.get_sky_completeness(x[0] / r, x[1] / r, x[2] / r) +
+             selfunc.getRadialSelection(r, 0);
+    }
+  } // namespace internalIntegerWindow
+
+  template <
+      typename RandomNum, typename IntegerWindow, typename SelFunction3d,
+      typename Dimension, typename IDimension>
+  void computeMajorityVoteWindow3d(
+      MPI_Communication *comm, RandomNum &rng, SelFunction3d const &selFuncData,
+      IntegerWindow &selfunc, const Dimension &L, const Dimension &d,
+      const Dimension &xmin, const IDimension &N, size_t numCalls = 6000) {
+    LIBLSS_AUTO_CONTEXT2(LOG_INFO, ctx, "computeMajorityVoteWindow3d");
+    using boost::format;
+    using boost::str;
+
+    boost::multi_array<int, 1> count_elements(
+        boost::extents[LibLSS::smp_get_max_threads()]);
+    size_t startN0 = selfunc.index_bases()[0];
+    size_t localN0 = selfunc.shape()[0], N1 = N[1], N2 = N[2];
+    double d0 = d[0];
+    double d1 = d[1];
+    double d2 = d[2];
+    double xmin0 = xmin[0];
+    double xmin1 = xmin[1];
+    double xmin2 = xmin[2];
+    size_t N0 = N[0];
+
+    size_t calls = 10;
+
+    auto &p = Console::instance().start_progress<LOG_STD>(
+        "3D Integer Window", localN0 * N1 * N2, 2);
+
+    ctx.format("Use %d calls integral / calls", numCalls);
+
+    std::fill(count_elements.begin(), count_elements.end(), 0);
+
+    long job_start = startN0 * N1 * N2;
+    long job_end = (startN0 + localN0) * N1 * N2;
+
+    ctx.format2<LOG_DEBUG>(
+        "Window computation, MPI job_start=%ld job_end=%ld", job_start,
+        job_end);
+    ctx.format2<LOG_DEBUG>(
+        "d=[%g,%g,%g], L=[%g,%g,%g]", d[0], d[1], d[2], L[0], L[1], L[2]);
+
+    double dV = d0 * d1 * d2;
+
+    typedef boost::multi_array_types::extent_range range;
+    boost::multi_array<bool, 3> dummy(
+        boost::extents[range(startN0, startN0 + localN0)][N1][N2]);
+    boost::multi_array<double, 3> all_err(
+        boost::extents[range(startN0, startN0 + localN0)][N1][N2]);
+
+#pragma omp parallel
+    {
+      std::map<unsigned int, unsigned int> hitCount;
+#pragma omp for schedule(dynamic, 100)
+      for (size_t i = job_start; i < job_end; i++) {
+        ///get 3d indices
+        size_t ii = (size_t)(i / N1 / N2);
+        size_t jj = (size_t)(i / N2 - ii * N1);
+        size_t kk = (size_t)(i - jj * N2 - ii * N2 * N1);
+
+        double x = double(ii) * d0 + xmin0, y = double(jj) * d1 + xmin1,
+               z = double(kk) * d2 + xmin2;
+        double err;
+        std::array<double, 3> xl{x - 0.5 * d0, y - 0.5 * d1, z - 0.5 * d2}; // half voxel shift is for NGP in projection
+        std::array<double, 3> xu{x + 0.5 * d0, y + 0.5 * d1, z + 0.5 * d2};
+
+        hitCount.clear();
+        for (size_t c = 0; c < numCalls; c++) {
+          std::array<double, 3> x;
+          for (unsigned int j = 0; j < 3; j++)
+            x[j] = xl[j] + (xu[j] - xl[j]) * rng.uniform();
+
+          hitCount[internalIntegerWindow::selectionValue(x, selFuncData)]++;
+        }
+
+        // Find majority vote
+        selfunc[ii][jj][kk] = std::max_element(
+                                  hitCount.begin(), hitCount.end(),
+                                  [](auto const &x, auto const &y) {
+                                    return x.second < y.second;
+                                  })
+                                  ->first;
+
+        assert(LibLSS::smp_get_thread_id() < LibLSS::smp_get_max_threads());
+        count_elements[LibLSS::smp_get_thread_id()]++;
+        if (LibLSS::smp_get_thread_id() == 0) {
+          int done =
+              std::accumulate(count_elements.begin(), count_elements.end(), 0);
+          p.update(done);
+        }
+      }
+    }
+    p.destroy();
+  }
+}; // namespace LibLSS
+
+#endif
--- a/libLSS/data/linear_selection.hpp
+++ b/libLSS/data/linear_selection.hpp
@ -0,0 +1,213 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/linear_selection.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_DATA_LINEAR_SELECTION_HPP
+#define __LIBLSS_DATA_LINEAR_SELECTION_HPP
+
+#include <boost/format.hpp>
+#include <boost/algorithm/string/trim.hpp>
+#include <boost/lexical_cast.hpp>
+#include <H5Cpp.h>
+#include <fstream>
+#include <sstream>
+#include <cmath>
+#include <cfloat>
+#include <healpix_cxx/pointing.h>
+#include <healpix_cxx/healpix_map.h>
+#include <healpix_cxx/healpix_map_fitsio.h>
+#include <CosmoTool/hdf5_array.hpp>
+#include "libLSS/tools/hdf5_type.hpp"
+
+namespace LibLSS {
+
+  class LinearInterpolatedSelection {
+  protected:
+    boost::multi_array<double, 1> selection;
+    double dr, rmin, dmin, dmax;
+    Healpix_Map<double> sky;
+
+  public:
+    LinearInterpolatedSelection() : sky(1, RING, SET_NSIDE), rmin(0), dr(1) {
+      std::fill(
+          selection.data(), selection.data() + selection.num_elements(), 1);
+      this->dmin = 0;
+      this->dmax = 0;
+    }
+    ~LinearInterpolatedSelection() {}
+
+    void loadSky(const std::string &fname, double threshold = 0) {
+      read_Healpix_map_from_fits(fname, sky);
+      for (long i = 0; i < sky.Npix(); i++)
+        if (sky[i] < threshold)
+          sky[i] = 0;
+    }
+
+    void fillSky(double v) { sky.fill(v); }
+
+    void clearSky() { sky.SetNside(1, RING); }
+
+    void setMinMaxDistances(double dmin, double dmax) {
+      this->dmin = dmin;
+      this->dmax = dmax;
+    }
+
+    void loadRadial(const std::string &fname) {
+      using namespace std;
+      using boost::format;
+      using boost::str;
+
+      ifstream f(fname.c_str());
+      string line;
+
+      if (!f) {
+        error_helper<ErrorIO>(
+            str(format("Failed to open '%s' to load radial") % fname));
+      }
+
+      {
+        int numPoints;
+
+        while (getline(f, line))
+          if (line[0] != '#')
+            break;
+        if (!f)
+          error_helper<ErrorIO>("Error finding the first line");
+
+        istringstream iss(line);
+
+        iss >> rmin >> dr >> numPoints;
+        selection.resize(boost::extents[numPoints]);
+        Console::instance().print<LOG_INFO>(
+            boost::format(
+                "Found selection with %d points from %g Mpc/h to %g Mpc/h") %
+            numPoints % rmin % (rmin + dr * numPoints));
+        this->dmax = rmin + dr * numPoints * 2;
+      }
+
+      for (long i = 0; i < selection.shape()[0]; i++) {
+        if (!getline(f, line))
+          error_helper<ErrorIO>(str(format("Error reading line %d") % (i + 2)));
+        if (line[0] == '#')
+          continue;
+        try {
+          boost::algorithm::trim(line);
+          selection[i] = boost::lexical_cast<double>(line);
+        } catch (const std::exception &e) {
+          error_helper<ErrorIO>(
+              str(format("Bad value cast on line %d") % (i + 2)));
+        }
+      }
+    }
+
+    void setArray(const boost::multi_array<double, 1> &a, double rmax) {
+      this->rmin = 0;
+      this->dr = rmax / a.num_elements();
+      selection.resize(boost::extents[a.num_elements()]);
+      selection = a;
+    }
+
+    double getRadialSelection(double r, int n) const {
+
+      double q = (r - rmin) / dr;
+      double q0 = std::floor(q);
+      int i = int(q0);
+      double f = q - q0;
+
+      //Console::instance().c_assert(r < rmax, "Box too large for radial selection table");
+      if ((i + 1) >= selection.shape()[0] || i < 0)
+        return 0;
+      if (r < dmin || r > dmax)
+        return 0;
+
+      return (1 - f) * selection[i] + f * selection[i + 1];
+    }
+
+    int getNumRadial() const { return 1; }
+
+    double get_sky_completeness(double x, double y, double z) const {
+      double r = std::max(std::sqrt(x * x + y * y + z * z), DBL_EPSILON);
+      return sky[sky.vec2pix(vec3(x / r, y / r, z / r))];
+    }
+
+    void saveFunction(H5_CommonFileGroup &fg) {
+      CosmoTool::get_hdf5_data_type<double> ht;
+      hsize_t Npix = sky.Npix();
+      {
+        H5::DataSpace dataspace(1, &Npix);
+        H5::DataSet dataset =
+            fg.createDataSet("completeness", ht.type(), dataspace);
+        dataset.write(&sky[0], ht.type());
+      }
+
+      {
+        hsize_t s = 1;
+        H5::DataSpace dataspace(1, &s);
+        H5::DataSet dataset = fg.createDataSet("dr", ht.type(), dataspace);
+        dataset.write(&dr, ht.type());
+
+        H5::DataSet dataset2 = fg.createDataSet("rmin", ht.type(), dataspace);
+        dataset2.write(&rmin, ht.type());
+      }
+
+      CosmoTool::hdf5_write_array(fg, "radial_selection", selection);
+    }
+
+    void loadFunction(H5_CommonFileGroup &fg) {
+      CosmoTool::get_hdf5_data_type<double> ht;
+      hsize_t Npix;
+
+      {
+        H5::DataSet dataset = fg.openDataSet("completeness");
+        H5::DataSpace dataspace = dataset.getSpace();
+
+        if (dataspace.getSimpleExtentNdims() != 1) {
+          error_helper<ErrorIO>("Invalid stored array");
+        }
+
+        dataspace.getSimpleExtentDims(&Npix);
+        sky.SetNside(sky.npix2nside(Npix), RING);
+        dataset.read(&sky[0], ht.type());
+      }
+      {
+        H5::DataSet dataset = fg.openDataSet("rmin");
+        H5::DataSpace dataspace = dataset.getSpace();
+        hsize_t n;
+
+        if (dataspace.getSimpleExtentNdims() != 1)
+          error_helper<ErrorIO>("Invalid stored rmin");
+
+        dataspace.getSimpleExtentDims(&n);
+        if (n != 1)
+          error_helper<ErrorIO>("Invalid stored rmin");
+
+        dataset.read(&rmin, ht.type());
+      }
+
+      {
+        H5::DataSet dataset = fg.openDataSet("dr");
+        H5::DataSpace dataspace = dataset.getSpace();
+        hsize_t n;
+
+        if (dataspace.getSimpleExtentNdims() != 1)
+          error_helper<ErrorIO>("Invalid stored dr");
+
+        dataspace.getSimpleExtentDims(&n);
+        if (n != 1)
+          error_helper<ErrorIO>("Invalid stored dr");
+
+        dataset.read(&dr, ht.type());
+      }
+
+      CosmoTool::hdf5_read_array(fg, "radial_selection", selection);
+    }
+  };
+
+} // namespace LibLSS
+
+#endif
--- a/libLSS/data/postools.hpp
+++ b/libLSS/data/postools.hpp
@ -0,0 +1,37 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/postools.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_POSTOOLS_HPP
+#define __LIBLSS_POSTOOLS_HPP
+
+#include <cmath>
+
+namespace LibLSS {
+
+    template<typename T, typename Array>
+    void loadPosition(T x, T y, T z, Array& xyz) {
+        xyz[0] = x;
+        xyz[1] = y;
+        xyz[2] = z;
+    }
+
+    template<typename T, typename Array>
+    void loadVelocity(T vx, T vy, T vz, Array& vxyz) {
+		vxyz[0] = vx;
+		vxyz[1] = vy;
+		vxyz[2] = vz;
+    }
+
+     //template<typename T, typename Array>
+    //void ComputeRedshiftSpacePosition(Array& xyz, Array& vxyz) {
+    //}
+
+};
+
+#endif
--- a/libLSS/data/projection.hpp
+++ b/libLSS/data/projection.hpp
@ -0,0 +1,283 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/projection.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_PROJECTION_HPP
+#define __LIBLSS_PROJECTION_HPP
+
+#include <algorithm>
+#include <array>
+#include <boost/array.hpp>
+#include <boost/multi_array.hpp>
+#include <boost/lambda/lambda.hpp>
+#include "angtools.hpp"
+#include "postools.hpp"
+#include "libLSS/tools/array_tools.hpp"
+#include "libLSS/physics/generic_cic.hpp"
+
+namespace LibLSS {
+
+  enum ProjectionDataModel { NGP_PROJECTION, LUMINOSITY_CIC_PROJECTION };
+
+  static const int LSS_DIMENSIONS = 3;
+  static const int NR_CELLS_DIM = 2;
+  static const int NR_CELLS_SLICE = 4;
+  static const int NR_CELLS_TOTAL = 8;
+  static const double TOTAL_WEIGHT = 1.;
+
+  struct Dimension {
+    union {
+      double length[LSS_DIMENSIONS];
+      double position[LSS_DIMENSIONS];
+    };
+  };
+
+  struct Grid {
+    size_t resolution[LSS_DIMENSIONS];
+  };
+
+  namespace details {
+    template <typename GSurvey>
+    struct ProjectionAcceptAll {
+      bool operator()(const typename GSurvey::GalaxyType &g) { return true; }
+    };
+  }; // namespace details
+
+  template <
+      typename Kernel, typename Periodic, class GSurvey, typename DensityField,
+      typename Dimension, typename IDimension, typename Condition,
+      typename PreRun = std::function<void()>>
+  size_t galaxySurveyToGridGeneric(
+      const GSurvey &survey, DensityField &field, const IDimension &N,
+      const Dimension &corner, const Dimension &L, const Dimension &d,
+      Condition condition, PreRun prerun = PreRun()) {
+    const typename DensityField::size_type *localN = field.shape();
+    const typename DensityField::index *base = field.index_bases();
+    using boost::format;
+    using boost::lambda::_1;
+    size_t accepted = 0;
+    double found_corners[LSS_DIMENSIONS][2];
+    boost::multi_array<double, 2> xyz(boost::extents[survey.surveySize()][3]);
+    boost::multi_array<double, 1> weights(boost::extents[survey.surveySize()]);
+
+    // prerun must not be empty
+    if (prerun)
+      prerun();
+
+    for (int i = 0; i < LSS_DIMENSIONS; i++) {
+      found_corners[i][0] = std::numeric_limits<double>::infinity();
+      found_corners[i][1] = -std::numeric_limits<double>::infinity();
+    }
+
+    array::fill(field, 0);
+
+    for (long i = 0; i < survey.surveySize(); i++) {
+      typename GSurvey::ConstRefGalaxyType g = survey[i];
+      boost::array<typename DensityField::index, LSS_DIMENSIONS> ii;
+      boost::array<double, LSS_DIMENSIONS> loc_xyz;
+
+      if (!condition(g))
+        continue;
+
+      ang2vec(g.phi, g.theta, loc_xyz);
+
+      for (int j = 0; j < LSS_DIMENSIONS; j++) {
+        loc_xyz[j] = loc_xyz[j] * g.r - corner[j];
+        found_corners[j][0] = std::min(loc_xyz[j], found_corners[j][0]);
+        found_corners[j][1] = std::max(loc_xyz[j], found_corners[j][1]);
+      }
+
+      std::copy(loc_xyz.begin(), loc_xyz.end(), xyz[accepted].begin());
+      weights[accepted] = g.final_w;
+      accepted++;
+    }
+    Console::instance().format<LOG_VERBOSE>(
+        "Using type %s for projection", typeid(Periodic).name());
+    Kernel::projection(
+        xyz, field, L[0], L[1], L[2], N[0], N[1], N[2],
+        Periodic(N[0], N[1], N[2]), weights, accepted);
+
+    Console::instance().print<LOG_VERBOSE>(
+        format("Project to grid: accepted %d galaxies") % accepted);
+    {
+      std::string cstr;
+
+      for (int j = 0; j < LSS_DIMENSIONS; j++)
+        cstr += str(
+            format("(%lg - %lg) ") % found_corners[j][0] % found_corners[j][1]);
+      Console::instance().print<LOG_VERBOSE>(
+          "Project to grid: found corners " + cstr);
+    }
+
+    return accepted;
+  }
+
+  template <
+      typename Kernel, typename Periodic, class GSurvey, typename DensityField,
+      typename Dimension, typename IDimension>
+  size_t galaxySurveyToGrid_all(
+      const GSurvey &survey, DensityField &field, const IDimension &N,
+      const Dimension &corner, const Dimension &L, const Dimension &d) {
+    details::ProjectionAcceptAll<GSurvey> condition;
+
+    return galaxySurveyToGridGeneric<Kernel, Periodic>(
+        survey, field, N, corner, L, d, condition);
+  }
+
+  /* This function create a mock survey based on the selection function hold in survey_in and the full density field in field.
+     */
+  template <class GSurvey, typename DensityField, typename Dimension>
+  void createMockSurvey(
+      const GSurvey &survey_in, GSurvey &survey_out, DensityField &field,
+      const Dimension &corner, const Dimension &L) {}
+
+  template <
+      class GSurvey, typename DensityField, typename Grid, typename Dimension,
+      typename Condition>
+  size_t haloSimToGridGeneric(
+      const GSurvey &sim, DensityField &field, const Grid &M,
+      const Dimension &corner, const Dimension &L, const Dimension &d,
+      Condition condition) {
+    const typename DensityField::size_type *N = field.shape();
+    const typename DensityField::index *base = field.index_bases();
+    using boost::format;
+    using boost::lambda::_1;
+
+    size_t accepted = 0;
+    double found_corners[LSS_DIMENSIONS][2];
+    for (auto i = 0; i < LSS_DIMENSIONS; i++) {
+      found_corners[i][0] = std::numeric_limits<double>::infinity();
+      found_corners[i][1] = -std::numeric_limits<double>::infinity();
+    }
+
+    for (auto i = 0; i < sim.surveySize(); i++) {
+      typename GSurvey::ConstRefGalaxyType h = sim[i];
+      std::array<
+          std::array<typename DensityField::index, LSS_DIMENSIONS>,
+          NR_CELLS_TOTAL>
+          ii;
+      std::array<double, LSS_DIMENSIONS> xyz;
+      bool validLowerSlice = true;
+      bool validUpperSlice = true;
+
+      if (!condition(h))
+        continue;
+
+      loadPosition(h.posx, h.posy, h.posz, xyz);
+
+      for (int j = 0; j < LSS_DIMENSIONS; j++) {
+        ii[0][j] = (int)std::floor((xyz[j] - corner[j]) / d[j]);
+        found_corners[j][0] = std::min(xyz[j], found_corners[j][0]);
+        found_corners[j][1] = std::max(xyz[j], found_corners[j][1]);
+      }
+
+      std::array<double, NR_CELLS_TOTAL> weight;
+      std::array<std::array<double, LSS_DIMENSIONS>, NR_CELLS_DIM> wxyz;
+      for (auto j = 0; j < LSS_DIMENSIONS; j++) {
+        wxyz[1][j] = ((xyz[j] - corner[j]) / d[j]) - ii[0][j];
+        wxyz[0][j] = TOTAL_WEIGHT - wxyz[1][j];
+      }
+      weight[0] = wxyz[0][0] * wxyz[0][1] * wxyz[0][2];
+      weight[1] = wxyz[0][0] * wxyz[1][1] * wxyz[0][2];
+      weight[2] = wxyz[0][0] * wxyz[0][1] * wxyz[1][2];
+      weight[3] = wxyz[0][0] * wxyz[1][1] * wxyz[1][2];
+      weight[4] = wxyz[1][0] * wxyz[0][1] * wxyz[0][2];
+      weight[5] = wxyz[1][0] * wxyz[1][1] * wxyz[0][2];
+      weight[6] = wxyz[1][0] * wxyz[0][1] * wxyz[1][2];
+      weight[7] = wxyz[1][0] * wxyz[1][1] * wxyz[1][2];
+
+      for (auto j = 0; j < LSS_DIMENSIONS; j++) {
+        if ((ii[0][j] == -1) || (ii[0][j] == M[j]))
+          ii[0][j] = M[j] - 1;
+      }
+
+      for (auto cell = 1; cell < NR_CELLS_TOTAL; cell++) {
+        std::copy(std::begin(ii[0]), std::end(ii[0]), std::begin(ii[cell]));
+      }
+
+      ii[1][1]++;
+      ii[1][1] = (size_t)std::fmod(ii[1][1], M[1]);
+
+      ii[2][2]++;
+      ii[2][2] = (size_t)std::fmod(ii[2][2], M[2]);
+
+      ii[3][1]++;
+      ii[3][1] = (size_t)std::fmod(ii[3][1], M[1]);
+      ii[3][2]++;
+      ii[3][2] = (size_t)std::fmod(ii[3][2], M[2]);
+
+      ii[4][0]++;
+      ii[4][0] = (size_t)std::fmod(ii[4][0], M[0]);
+
+      ii[5][0]++;
+      ii[5][0] = (size_t)std::fmod(ii[5][0], M[0]);
+      ii[5][1]++;
+      ii[5][1] = (size_t)std::fmod(ii[5][1], M[1]);
+
+      ii[6][0]++;
+      ii[6][0] = (size_t)std::fmod(ii[6][0], M[0]);
+      ii[6][2]++;
+      ii[6][2] = (size_t)std::fmod(ii[6][2], M[2]);
+
+      for (auto j = 0; j < LSS_DIMENSIONS; j++) {
+        ii[7][j]++;
+        ii[7][j] = (size_t)std::fmod(ii[7][j], M[j]);
+      }
+
+      for (auto j = 0; j < LSS_DIMENSIONS; j++) {
+        validLowerSlice = validLowerSlice &&
+                          (ii[0][j] >= base[j] && ii[0][j] < (base[j] + N[j]));
+        validUpperSlice = validUpperSlice &&
+                          (ii[4][j] >= base[j] && ii[4][j] < (base[j] + N[j]));
+      }
+      if (validLowerSlice) {
+        for (auto cell = 0; cell < NR_CELLS_SLICE; cell++) {
+          field(ii[cell]) += weight[cell] * h.w;
+          accepted++;
+        }
+      }
+      if (validUpperSlice) {
+        for (auto cell = NR_CELLS_SLICE; cell < NR_CELLS_TOTAL; cell++) {
+          field(ii[cell]) += weight[cell] * h.w;
+          accepted++;
+        }
+      }
+    }
+    Console::instance().print<LOG_VERBOSE>(
+        format("Project to grid: accept and assign halos to %d cells") %
+        accepted);
+    {
+      std::string cstr;
+      for (auto j = 0; j < LSS_DIMENSIONS; j++)
+        cstr += str(
+            format("(%lg - %lg) ") % found_corners[j][0] % found_corners[j][1]);
+      Console::instance().print<LOG_VERBOSE>(
+          "Project to grid: found corners " + cstr);
+    }
+    return accepted;
+  }
+
+  template <
+      class GSurvey, typename DensityField, typename Grid, typename Dimension>
+  size_t haloSimToGrid_all(
+      const GSurvey &sim, DensityField &field, const Grid &M,
+      const Dimension &corner, const Dimension &L, const Dimension &d) {
+    details::ProjectionAcceptAll<GSurvey> condition;
+    return haloSimToGridGeneric<
+        GSurvey, DensityField, Grid, Dimension,
+        details::ProjectionAcceptAll<GSurvey>>(
+        sim, field, M, corner, L, d, condition);
+  }
+
+}; // namespace LibLSS
+
+CTOOL_ENUM_TYPE(
+    LibLSS::ProjectionDataModel, HDF5T_ProjectionDataModel,
+    (LibLSS::NGP_PROJECTION)(LibLSS::LUMINOSITY_CIC_PROJECTION))
+
+#endif
--- a/libLSS/data/schechter_completeness.hpp
+++ b/libLSS/data/schechter_completeness.hpp
@ -0,0 +1,159 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/schechter_completeness.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_SCHECHTER_COMPLETENESS_HPP
+#define __LIBLSS_SCHECHTER_COMPLETENESS_HPP
+
+#include <cmath>
+#include <functional>
+#include <boost/format.hpp>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/physics/cosmo.hpp"
+#include "libLSS/tools/gslIntegrate.hpp"
+#include <CosmoTool/hdf5_array.hpp>
+#include "libLSS/data/spectro_gals.hpp"
+#include "libLSS/data/galaxies.hpp"
+#include "libLSS/data/projection.hpp"
+
+namespace LibLSS {
+
+  typedef std::function<bool(const BaseGalaxyDescriptor &)> GalaxySelector;
+
+  struct GalaxySampleSelection {
+    double bright_apparent_magnitude_cut;
+    double faint_apparent_magnitude_cut;
+    double bright_absolute_magnitude_cut;
+    double faint_absolute_magnitude_cut;
+
+    double zmin, zmax;
+    double dmin, dmax;
+
+    double low_mass_cut;
+    double high_mass_cut;
+    double small_radius_cut;
+    double large_radius_cut;
+    double low_spin_cut;
+    double high_spin_cut;
+
+    // This is required to satisfy C++ object layout
+    // Otherwise the struct GalaxySampleSelection is not "trivial".
+    std::shared_ptr<GalaxySelector> selector;
+    ProjectionDataModel projection;
+  };
+
+  static inline std::shared_ptr<GalaxySelector> makeSelector(GalaxySelector f) {
+    return std::make_shared<GalaxySelector>(f);
+  }
+
+  struct SchechterParameters {
+    double Mstar, alpha;
+  };
+
+  namespace details {
+
+    static inline double
+    _integrand_luminosity(const SchechterParameters &params, double x) {
+      return std::pow(x, params.alpha) * exp(-x);
+    }
+
+    static inline double integral_luminosity(
+        const SchechterParameters &params, double x_min, double x_max) {
+      return gslIntegrate(
+          std::bind(_integrand_luminosity, params, std::placeholders::_1),
+          x_min, x_max, 1e-8);
+    }
+
+    static inline double computeSchechterCompleteness(
+        const Cosmology &cosmo, double z, double d_comoving,
+        const GalaxySampleSelection &selection,
+        const SchechterParameters &params,
+        CorrectionFunction zcorrection = nullCorrection) {
+      using boost::format;
+      ConsoleContext<LOG_DEBUG> ctx("computeSchechterCompleteness");
+
+      double d_lum = cosmo.d2dlum(z, d_comoving);
+      double corr = zcorrection(z);
+
+      double absolute_mu0 = selection.faint_apparent_magnitude_cut -
+                            5 * std::log10(d_lum) - 25 - corr;
+      double absolute_ml0 = selection.bright_apparent_magnitude_cut -
+                            5 * std::log10(d_lum) - 25 - corr;
+
+      double abmu =
+          std::min(absolute_mu0, selection.faint_absolute_magnitude_cut);
+      double abml =
+          std::max(absolute_ml0, selection.bright_absolute_magnitude_cut);
+
+      ctx.print(
+          format("z = %lg d_lum = %lg abmu = %lg abml = %lg") % z % d_lum %
+          abmu % abml);
+
+      abmu = std::max(abmu, abml);
+
+      double xl0 = std::pow(10.0, 0.4 * (params.Mstar - abmu));
+      double xu0 = std::pow(10.0, 0.4 * (params.Mstar - abml));
+
+      double xl1 = std::pow(
+          10.0, 0.4 * (params.Mstar - selection.faint_absolute_magnitude_cut));
+      double xu1 = std::pow(
+          10.0, 0.4 * (params.Mstar - selection.bright_absolute_magnitude_cut));
+
+      ctx.print(
+          format("xl0 = %lg, xu0 = %lg, xl1 = %lg, xu1 = %lg") % xl0 % xu0 %
+          xl1 % xu1);
+
+      double Phi0 = integral_luminosity(params, xl0, xu0);
+      double Phi1 = integral_luminosity(params, xl1, xu1);
+
+      return std::max(0.0, Phi0 / Phi1);
+    }
+
+  } // namespace details
+
+  template <typename Array>
+  void buildCompletenessFromSchechterFunction(
+      const Cosmology &cosmo, const GalaxySampleSelection &selection,
+      const SchechterParameters &params, Array &completeness, double Dmax,
+      CorrectionFunction zcorr = details::nullCorrection) {
+
+    ConsoleContext<LOG_DEBUG> ctx("buildCompletenessFromSchechterFunction");
+    long N = completeness.num_elements();
+    for (long i = 1; i < N; i++) {
+      double d = i * Dmax / N;
+      double z = cosmo.a2z(cosmo.com2a(cosmo.comph2com(d)));
+
+      if (z < selection.zmin || z > selection.zmax)
+        completeness[i] = 0;
+      else
+        completeness[i] = details::computeSchechterCompleteness(
+            cosmo, z, d, selection, params, zcorr);
+      //ctx.print(boost::format("d = %lg, z = %lg, C = %lg") % d % z % completeness[i]);
+    }
+    // zero distance is hard, just copy the one next to it. If sampling is sufficient that will not matter.
+    completeness[0] = completeness[1];
+  }
+
+} // namespace LibLSS
+
+CTOOL_STRUCT_TYPE(
+    LibLSS::GalaxySampleSelection, HDF5T_GalaxySampleSelection,
+    ((double,
+      bright_apparent_magnitude_cut))((double, faint_apparent_magnitude_cut))(
+        (double, bright_absolute_magnitude_cut))(
+        (double, faint_absolute_magnitude_cut))((double, zmin))((double, zmax))(
+        (double, dmin))((double, dmax))((double, low_mass_cut))(
+        (double, high_mass_cut))((double, small_radius_cut))(
+        (double, large_radius_cut))((double, low_spin_cut))(
+        (double, high_spin_cut))((LibLSS::ProjectionDataModel, projection)));
+
+CTOOL_STRUCT_TYPE(
+    LibLSS::SchechterParameters, HDF5T_SchechterParameters,
+    ((double, Mstar))((double, alpha)));
+
+#endif
--- a/libLSS/data/spectro_gals.hpp
+++ b/libLSS/data/spectro_gals.hpp
@ -0,0 +1,164 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/spectro_gals.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_DATA_GALACTIC_HPP
+#define __LIBLSS_DATA_GALACTIC_HPP
+
+#include <H5Cpp.h>
+#include <boost/utility/base_from_member.hpp>
+#include <boost/mpl/assert.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/multi_array.hpp>
+#include <boost/function.hpp>
+#include <healpix_cxx/pointing.h>
+#include "libLSS/data/base.hpp"
+#include "libLSS/tools/allocator_policy.hpp"
+#include "libLSS/tools/checkmem.hpp"
+#include "libLSS/physics/cosmo.hpp"
+#include "libLSS/tools/hdf5_type.hpp"
+
+namespace LibLSS
+{
+    class NoSelection {
+    public:
+        int getNumRadial() const {return 1;}
+        double getRadialSelection(double r, int n) const { return 1; }
+        double get_sky_completeness(double x, double y, double z) const { return 1; }
+    };
+
+    HAS_MEM_FUNC(saveFunction, has_save_function);
+    HAS_MEM_FUNC(loadFunction, has_load_function);
+
+    /* These are two helper functions. Depending on the availability of the
+     * member function void T::saveFunction(H5_CommonFileGroup&), the function
+     * will be executed (or not if it does not exist). This ensures
+     * that GalaxySurvey always try to save the maximum but still is
+     * compatible with restricted selection functions.
+     */
+    namespace details {
+
+        template<typename T>
+        typename boost::enable_if< has_save_function<T, void (T::*)(H5_CommonFileGroup&)> >::type
+           saveRadialCompleteness(H5_CommonFileGroup& fg, T& func)
+        {
+            func.saveFunction(fg);
+        }
+
+        template<typename T>
+        typename boost::disable_if< has_load_function<T, void (T::*)(H5_CommonFileGroup&)> >::type
+           saveRadialCompleteness(H5_CommonFileGroup& fg, T& func)
+
+        {
+        }
+
+        template<typename T>
+        typename boost::enable_if< has_load_function<T, void (T::*)(H5_CommonFileGroup&)> >::type
+           loadRadialCompleteness(H5_CommonFileGroup& fg, T& func)
+        {
+            func.loadFunction(fg);
+        }
+
+        template<typename T>
+        typename boost::disable_if< has_save_function<T, void (T::*)(H5_CommonFileGroup&)> >::type
+           loadRadialCompleteness(H5_CommonFileGroup& fg, T& func)
+
+        {
+        }
+
+        static double nullCorrection(double d) { return 0; }
+
+    };
+
+    typedef boost::function1<double, double> CorrectionFunction;
+
+    template<typename SelFunction, class GT, class AllocationPolicy = DefaultAllocationPolicy>
+    class GalaxySurvey: virtual LibLSS::Base_Data
+    {
+    public:
+        typedef GT GalaxyType;
+        typedef GT& RefGalaxyType;
+        typedef const GT& ConstRefGalaxyType;
+        typedef typename boost::multi_array<GalaxyType, 1> GalaxyArray;
+    protected:
+        GalaxyArray galaxies;
+        long numGalaxies;
+        SelFunction radialSelection;
+        bool is_reference_survey;
+        CorrectionFunction zcorrection;
+    public:
+        GalaxySurvey(bool ref_survey = false) : numGalaxies(0), is_reference_survey(ref_survey) {}
+        ~GalaxySurvey() {}
+
+        SelFunction& selection() { return radialSelection; }
+        const SelFunction& selection() const { return radialSelection; }
+
+        double getCompleteness(double phi, double theta) {
+            vec3 v(pointing(0.5*M_PI - theta, phi));
+            return radialSelection.get_sky_completeness(v.x, v.y, v.z);
+        }
+
+        void setSelectionFunction(SelFunction f) {
+            radialSelection = f;
+        }
+
+        bool isReferenceSurvey() const { return is_reference_survey; }
+
+        RefGalaxyType operator[](size_t i)  {
+            return galaxies[i];
+        }
+
+        ConstRefGalaxyType operator[](size_t i) const {
+            return galaxies[i];
+        }
+
+        void optimize() {
+           galaxies.resize(boost::extents[numGalaxies]);
+        }
+
+        long surveySize() const { return numGalaxies; }
+
+        // Methods defined in the tcc file
+        void addGalaxy(const GalaxyType& g);
+
+        // I/O support for galaxy surveys
+        void saveMain(H5_CommonFileGroup& fg);
+        void restoreMain(H5_CommonFileGroup& fg);
+
+        void save(H5_CommonFileGroup& fg) {
+            saveMain(fg);
+            details::saveRadialCompleteness(fg, radialSelection);
+        }
+
+        void restore(H5_CommonFileGroup& fg) {
+            restoreMain(fg);
+            details::loadRadialCompleteness(fg, radialSelection);
+        }
+
+        void updateComovingDistance(const Cosmology& cosmo, const CorrectionFunction& zcorrection = details::nullCorrection);
+
+        void useLuminosityAsWeight();
+        void resetWeight();
+
+        void setCorrections(const CorrectionFunction& zcorrection = details::nullCorrection) { this->zcorrection = zcorrection; }
+
+        //
+        GalaxyArray& getGalaxies() { return galaxies; }
+        const GalaxyArray& getGalaxies() const { return galaxies; }
+        GalaxyArray& allocateGalaxies(size_t numGals) { 
+            numGalaxies = numGals;
+            galaxies.resize(boost::extents[numGals]);
+            return galaxies;
+        }
+    };
+
+};
+
+#include "spectro_gals.tcc"
+
+#endif
--- a/libLSS/data/spectro_gals.tcc
+++ b/libLSS/data/spectro_gals.tcc
@ -0,0 +1,81 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/spectro_gals.tcc
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include <CosmoTool/hdf5_array.hpp>
+#include "libLSS/tools/hdf5_scalar.hpp"
+#include "libLSS/tools/hdf5_type.hpp"
+
+namespace LibLSS {
+
+template<typename SelFunction, class GalaxyType, class AllocationPolicy>
+void GalaxySurvey<SelFunction,GalaxyType,AllocationPolicy>::addGalaxy(const GalaxyType& galaxy) {
+    if (numGalaxies == galaxies.size()) {
+        galaxies.resize(boost::extents[numGalaxies+AllocationPolicy::getIncrement()]);
+    }
+
+    galaxies[numGalaxies] = galaxy;
+
+    numGalaxies++;
+}
+
+template<typename SelFunction, class GalaxyType, class AllocationPolicy>
+void GalaxySurvey<SelFunction,GalaxyType,AllocationPolicy>::resetWeight() {
+  for (size_t i = 0; i < numGalaxies; i++) {
+    galaxies[i].final_w = galaxies[i].w;
+  }
+}
+
+template<typename SelFunction, class GalaxyType, class AllocationPolicy>
+void GalaxySurvey<SelFunction,GalaxyType,AllocationPolicy>::useLuminosityAsWeight() {
+  for (size_t i = 0; i < numGalaxies; i++) {
+    // Add a 10^8 scaling to put the values within a reasonable range scales for the MCMC.
+    double L = std::pow(10, -0.4*galaxies[i].M_abs)/1e8;
+    galaxies[i].final_w = galaxies[i].w * L;
+  }
+}
+
+template<typename SelFunction, class GalaxyType, class AllocationPolicy>
+void GalaxySurvey<SelFunction,GalaxyType,AllocationPolicy>::saveMain(H5_CommonFileGroup& fg)
+{
+    optimize();
+    CosmoTool::hdf5_write_array(fg, "galaxies", galaxies );
+
+    hdf5_save_scalar(fg, "is_reference_survey", is_reference_survey);
+}
+
+template<typename SelFunction, class GalaxyType, class AllocationPolicy>
+void GalaxySurvey<SelFunction,GalaxyType,AllocationPolicy>::restoreMain(H5_CommonFileGroup& fg)
+{
+    CosmoTool::hdf5_read_array(fg, "galaxies", galaxies );
+    numGalaxies = galaxies.size();
+    is_reference_survey = hdf5_load_scalar<bool>(fg, "is_reference_survey");
+}
+
+
+template<typename SelFunction, class GalaxyType, class AllocationPolicy>
+void GalaxySurvey<SelFunction,GalaxyType,AllocationPolicy>::updateComovingDistance(const Cosmology& cosmo, const CorrectionFunction& zcorrection)
+{
+    LibLSS::ConsoleContext<LOG_DEBUG> ctx("Updating comoving positions of galaxies");
+#pragma omp parallel for
+    for (size_t i = 0; i < numGalaxies; i++) {
+        if (galaxies[i].z < 0) {
+          galaxies[i].r = 0;
+          galaxies[i].M_abs = std::numeric_limits<double>::infinity();
+          continue;
+        }
+        galaxies[i].r = cosmo.com2comph(cosmo.a2com(cosmo.z2a(galaxies[i].z)));
+        double dlum = cosmo.d2dlum(galaxies[i].z, galaxies[i].r);
+        double zcorr = zcorrection(galaxies[i].z);
+//        ctx.print(boost::format("z[%d] = %lg, m_correction = %lg") % i % galaxies[i].z % zcorr);
+        galaxies[i].M_abs = galaxies[i].m - 5 * std::log10(dlum) - 25 - zcorr;
+    }
+}
+
+};
--- a/libLSS/data/survey_load_bin.hpp
+++ b/libLSS/data/survey_load_bin.hpp
@ -0,0 +1,139 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/survey_load_bin.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_GALAXY_LOAD_BIN_HPP
+#define __LIBLSS_GALAXY_LOAD_BIN_HPP
+
+#include <string>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <boost/format.hpp>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/errors.hpp"
+#include <CosmoTool/hdf5_array.hpp>
+#include <H5Cpp.h>
+
+namespace LibLSS {
+
+
+  struct BinGalaxyStruct {
+     size_t id;
+     double phi, theta;
+     double zo;
+     double m;
+     double M_abs;
+     double z;
+     double w;
+  };
+
+  struct BinHaloStruct {
+    size_t id;
+    double Mgal, radius, spin, posx, posy, posz, vx, vy, vz;
+    double w;
+  };
+
+}
+
+CTOOL_STRUCT_TYPE(LibLSS::BinGalaxyStruct, HDF5T_LibLSS_BinGalaxyStruct,
+  ((size_t, id))
+  ((double, phi))
+  ((double, theta))
+  ((double, zo))
+  ((double, m))
+  ((double, M_abs))
+  ((double, z))
+  ((double, w))
+);
+
+CTOOL_STRUCT_TYPE(LibLSS::BinHaloStruct, HDF5T_LibLSS_BinHaloStruct,
+  ((size_t, id))
+  ((double, Mgal))
+  ((double, radius))
+  ((double, spin))
+  ((double, posx))
+  ((double, posy))
+  ((double, posz))
+  ((double, vx))
+  ((double, vy))
+  ((double, vz))
+  ((double, w))
+);
+
+namespace LibLSS {
+  template<typename GalaxySurvey>
+  void  loadCatalogFromHDF5(
+      const std::string& fname,
+      const std::string& key,
+      GalaxySurvey& sim) {
+        using namespace std;
+        using boost::format;
+        Console& cons = Console::instance();
+        long originalSize = sim.surveySize();
+
+        cons.print<LOG_STD>(format("Reading HDF5 catalog file '%s' / key '%s'") % fname % key);
+        bool warningDefault = false;
+
+        boost::multi_array<BinGalaxyStruct, 1> halos;
+
+        H5::H5File f(fname, H5F_ACC_RDONLY) ;
+        CosmoTool::hdf5_read_array(f, key, halos);
+        auto& gals = sim.allocateGalaxies(halos.shape()[0]);
+        for (size_t i = 0; i < halos.num_elements(); i++) {
+          gals[i].id = halos[i].id;
+          gals[i].phi = halos[i].phi;
+          gals[i].theta = halos[i].theta;
+          gals[i].final_w = gals[i].w = halos[i].w;
+          gals[i].m = halos[i].m;
+          gals[i].M_abs = halos[i].M_abs;
+          gals[i].z = halos[i].z;
+          gals[i].zo = halos[i].zo;
+        }
+        cons.print<LOG_STD>(format("Got %d halos") % gals.num_elements());
+   }
+
+  template<typename GalaxySurvey>
+  void  loadHaloSimulationFromHDF5(
+      const std::string& fname,
+      const std::string& key,
+      GalaxySurvey& sim) {
+        using namespace std;
+        using boost::format;
+        Console& cons = Console::instance();
+        long originalSize = sim.surveySize();
+
+        cons.format<LOG_STD>("Reading HDF5 catalog file '%s' / key '%s'", fname , key);
+        bool warningDefault = false;
+
+        boost::multi_array<BinHaloStruct, 1> halos;
+
+        H5::H5File f(fname, H5F_ACC_RDONLY) ;
+	cons.print<LOG_STD>("Reading data file");
+        CosmoTool::hdf5_read_array(f, key, halos);
+	cons.print<LOG_STD>("Transfering to internal structure");
+        auto& gals = sim.allocateGalaxies(halos.num_elements());
+        for (size_t i = 0; i < halos.num_elements(); i++) {
+          gals[i].id = halos[i].id;
+          gals[i].final_w = gals[i].w = halos[i].w;
+          gals[i].posx = halos[i].posx;
+          gals[i].posy = halos[i].posy;
+          gals[i].posz = halos[i].posz;
+          gals[i].vx = halos[i].vx;
+          gals[i].vy = halos[i].vy;
+          gals[i].vz = halos[i].vz;
+          gals[i].spin = halos[i].spin;
+          gals[i].radius = halos[i].radius;
+          gals[i].Mgal = halos[i].Mgal;
+          vec2ang(std::array<double,3>{halos[i].posx,halos[i].posy,halos[i].posz}, gals[i].phi, gals[i].theta, gals[i].r);
+        }
+        cons.print<LOG_STD>(format("Got %d halos") % gals.num_elements());
+   }
+} 
+
+#endif
--- a/libLSS/data/survey_load_txt.hpp
+++ b/libLSS/data/survey_load_txt.hpp
@ -0,0 +1,108 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/survey_load_txt.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_GALAXY_LOAD_TXT_HPP
+#define __LIBLSS_GALAXY_LOAD_TXT_HPP
+
+#include <string>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <boost/format.hpp>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/errors.hpp"
+
+namespace LibLSS {
+
+  template <typename GalaxySurvey>
+  void
+  loadGalaxySurveyFromText(const std::string &fname, GalaxySurvey &survey) {
+    using namespace std;
+    using boost::format;
+    Console &cons = Console::instance();
+    long originalSize = survey.surveySize();
+    string line;
+
+    ifstream f(fname.c_str());
+
+    if (!f) {
+      error_helper<ErrorIO>(format("Cannot open file '%s'") % fname);
+    }
+
+    cons.print<LOG_STD>(format("Reading galaxy survey file '%s'") % fname);
+    bool warningDefault = false;
+    while (getline(f, line)) {
+      istringstream ss(line);
+      typename GalaxySurvey::GalaxyType g;
+
+      ss >> g.id >> g.phi >> g.theta >> g.zo >> g.m >> g.M_abs >> g.z;
+      g.Mgal = 0;
+      g.r = 0;
+      g.radius = 0;
+      g.spin = 0;
+      g.posx = g.posy = g.posz = 0;
+      g.vx = g.vy = g.vz = 0;
+      if (!(ss >> g.w)) {
+        g.w = 1;
+        warningDefault = true;
+      }
+      g.final_w = g.w;
+      survey.addGalaxy(g);
+    }
+    if (warningDefault)
+      cons.print<LOG_WARNING>("I used a default weight of 1");
+    cons.print<LOG_STD>(
+        format("Receive %d galaxies in total") %
+        (survey.surveySize() - originalSize));
+    survey.optimize();
+  }
+
+  template <typename GalaxySurvey>
+  void loadHaloSimulationFromText(const std::string &fname, GalaxySurvey &sim) {
+    using namespace std;
+    using boost::format;
+    Console &cons = Console::instance();
+    long originalSize = sim.surveySize();
+    string line;
+
+    ifstream f(fname.c_str());
+
+    if (!f) {
+      error_helper<ErrorIO>(format("Cannot open file '%s'") % fname);
+    }
+
+    cons.print<LOG_STD>(format("Read halo catalog file '%s'") % fname);
+    bool warningDefault = false;
+    while (getline(f, line)) {
+      istringstream ss(line);
+      typename GalaxySurvey::GalaxyType h;
+
+      ss >> h.id >> h.Mgal >> h.radius >> h.spin >> h.posx >> h.posy >>
+          h.posz >> h.vx >> h.vy >> h.vz;
+
+      if (!(ss >> h.w)) {
+        h.w = 1;
+        warningDefault = true;
+      }
+      h.final_w = h.w;
+      vec2ang(
+          std::array<double, 3>{h.posx, h.posy, h.posz}, h.phi, h.theta, h.r);
+      sim.addGalaxy(h);
+    }
+    sim.optimize();
+    if (warningDefault)
+      cons.print<LOG_WARNING>("Use default weight of 1 for all halos");
+    cons.print<LOG_STD>(
+        format("Receive %d halos in total") %
+        (sim.surveySize() - originalSize));
+  }
+
+} // namespace LibLSS
+
+#endif
--- a/libLSS/data/toto.cpp
+++ b/libLSS/data/toto.cpp
@ -0,0 +1,18 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/toto.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <boost/preprocessor/cat.hpp>
+#include <boost/preprocessor/seq/for_each.hpp>
+#include <boost/preprocessor/stringize.hpp>
+
+#define SEQ (z)(x)(y)(z)
+
+#define MACRO(r, data, elem) BOOST_PP_CAT(BOOST_PP_STRINGIZE(elem), data)
+
+BOOST_PP_SEQ_FOR_EACH(MACRO, _, SEQ) // expands to w_ x_ y_ z_
--- a/libLSS/data/window3d.hpp
+++ b/libLSS/data/window3d.hpp
@ -0,0 +1,174 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/window3d.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_WINDOW_3D_HPP
+#define __LIBLSS_WINDOW_3D_HPP
+
+#include <cassert>
+#include <functional>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/openmp.hpp"
+#include "libLSS/samplers/rgen/gsl_miser.hpp"
+#include <CosmoTool/algo.hpp>
+#include <boost/array.hpp>
+#include <numeric>
+#include <cmath>
+
+namespace LibLSS {
+
+    namespace internalWindow {
+
+      template<typename SelFunction3d>
+      double selectionValue(double *k, const SelFunction3d& selfunc) {
+        double r = std::sqrt(k[0]*k[0]+k[1]*k[1]+k[2]*k[2]);
+
+        return selfunc.get_sky_completeness(k[0]/r, k[1]/r, k[2]/r) * selfunc.getRadialSelection(r, 0);
+      }
+
+      template<typename SelFunction3d>
+      double selectionValue_just_sky(double *k, const SelFunction3d& selfunc) {
+        return selfunc.get_sky_completeness(k[0], k[1], k[2]);
+      }
+
+    }
+
+    template<typename RandomNum, typename SelFunction3d, typename SelFuncMask, typename Dimension>
+    void compute_window_value_elem(
+                    MPI_Communication *comm,
+                    RandomNum& rng,
+                    const SelFunction3d& selfunc,
+                    SelFuncMask& selfuncData,
+                    const Dimension& L,
+                    const Dimension& d,
+                    const Dimension& xmin, bool filter_mask,
+                    double precision = 0.01)
+    {
+        using boost::str;
+        using boost::format;
+
+        Console& cons = Console::instance();
+        boost::multi_array<int,1> count_elements(boost::extents[LibLSS::smp_get_max_threads()]);
+        size_t startN0 = selfuncData.index_bases()[0];
+        size_t localN0 = selfuncData.shape()[0], N1 = selfuncData.shape()[1], N2 = selfuncData.shape()[2];
+        double d0=d[0];
+        double d1=d[1];
+        double d2=d[2];
+        double xmin0 = xmin[0];
+        double xmin1 = xmin[1];
+        double xmin2 = xmin[2];
+        size_t N0 = L[0]/d0; // This is HACK
+
+        double refVolume = CosmoTool::square(precision/0.01)*CosmoTool::cube(3);  // Ref is 3 Mpc,
+
+        size_t calls = 10 + size_t(1000 * (d0*d1*d2 / refVolume));
+
+        cons.indent();
+
+        Progress<LOG_STD>& p = cons.start_progress<LOG_STD>("3D Window", localN0*N1*N2, 2);
+
+        cons.print<LOG_INFO>(
+            format("Use a tolerance of %g on window function integral / calls = %d")
+                    % precision % calls);
+
+        std::fill(count_elements.begin(), count_elements.end(), 0);
+
+        long job_start = startN0*N1*N2;
+        long job_end =  (startN0+localN0)*N1*N2;
+
+        cons.print<LOG_DEBUG>(
+            format("Window computation, MPI job_start=%ld job_end=%ld") % job_start % job_end
+        );
+        cons.print<LOG_DEBUG>(
+            format("Max threads = %d, ID = %d") % LibLSS::smp_get_max_threads() % LibLSS::smp_get_thread_id());
+        cons.print<LOG_DEBUG>(
+            format("d=[%g,%g,%g], L=[%g,%g,%g]") % d[0] % d[1] % d[2] % L[0] % L[1] % L[2]
+        );
+
+        double dV = d0*d1*d2;
+
+
+        typedef boost::multi_array_types::extent_range range;
+        boost::multi_array<bool,3> dummy(boost::extents[range(startN0,startN0+localN0)][N1][N2]);
+        boost::multi_array<double,3> all_err(boost::extents[range(startN0,startN0+localN0)][N1][N2]);
+        double mask_th = 0.5; //the voxel should have been observed at least to 50 percent
+
+#pragma omp parallel
+        {
+          GSL_Miser miser(3);
+
+#pragma omp for schedule(dynamic,100)
+          for(size_t i=job_start;i<job_end;i++) {
+            ///get 3d indices
+            size_t ii=(size_t) (i/N1/N2);
+            size_t jj=(size_t) (i/N2 - ii *N1);
+            size_t kk=(size_t) (i-jj*N2-ii*N2*N1);
+
+            double
+              x = double(ii)*d0+xmin0,
+              y = double(jj)*d1+xmin1,
+              z = double(kk)*d2+xmin2;
+            double err;
+            boost::array<double, 3> xl{x - 0.5*d0, y-0.5*d1, z-0.5*d2};
+            boost::array<double, 3> xu{x + 0.5*d0, y + 0.5*d1, z + 0.5*d2};
+
+            //here we do a pre-run, where we project the sky completeness into 3d
+            double auxval;
+            if (filter_mask) {
+              auxval = miser.integrate(rng,
+                    std::bind(&internalWindow::selectionValue_just_sky<SelFunction3d>, std::placeholders::_1, std::cref(selfunc)),
+                    xl, xu, calls, err) / (dV);
+            } else {
+              auxval =  mask_th*2;
+            }
+
+            //avoid double calculations for uninteresting mask regions
+            if(auxval > mask_th) {
+              dummy[ii][jj][kk]=true;
+
+              selfuncData[ii][jj][kk] =
+                miser.integrate(rng,
+                      std::bind(&internalWindow::selectionValue<SelFunction3d>, std::placeholders::_1, std::cref(selfunc)),
+                      xl, xu, calls, err) / (dV);
+               all_err[ii][jj][kk] = err;
+            } else {
+              selfuncData[ii][jj][kk] = 0.;
+            }
+
+            assert(LibLSS::smp_get_thread_id() < LibLSS::smp_get_max_threads());
+            count_elements[LibLSS::smp_get_thread_id()]++;
+            if (LibLSS::smp_get_thread_id() == 0) {
+                int done = std::accumulate(count_elements.begin(), count_elements.end(), 0);
+                p.update(done);
+            }
+          }
+        }
+
+        cons.unindent();
+        p.destroy();
+if (false)
+        {
+          H5::H5File f("window_err.h5", H5F_ACC_TRUNC);
+          CosmoTool::hdf5_write_array(f, "errors", all_err);
+          CosmoTool::hdf5_write_array(f, "sel", selfuncData);
+        }
+
+        ///now delete from mask
+        #pragma omp parallel for collapse(3)
+        for (size_t n0 = startN0; n0 < startN0+localN0; n0++) {
+            for (size_t n1 = 0; n1 < N1; n1++) {
+                for (size_t n2 = 0; n2 < N2; n2++) {
+                  if(!dummy[n0][n1][n2])
+                    selfuncData[n0][n1][n2] = 0.;
+                }
+              }
+        }
+    }
+};
+
+#endif
--- a/libLSS/data/window3d_post.hpp
+++ b/libLSS/data/window3d_post.hpp
@ -0,0 +1,105 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/data/window3d_post.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_DATA_WINDOW_3D_POST_HPP
+#define __LIBLSS_DATA_WINDOW_3D_POST_HPP
+
+#include <cassert>
+#include <functional>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/openmp.hpp"
+#include <CosmoTool/algo.hpp>
+#include <boost/array.hpp>
+#include <numeric>
+#include <cmath>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include "libLSS/tools/fusewrapper.hpp"
+
+namespace LibLSS {
+
+    namespace convolveDetails {
+      template<typename RealArray>
+      void buildKernel(FFTW_Manager_3d<double>& mgr, RealArray& real_data) {
+        constexpr double KernelCore[2] = {1, 0.5};
+        decltype(mgr.N0) N[3] = {mgr.N0, mgr.N1, mgr.N2};
+
+        for (int i = -1; i <= 1; i++) {
+          size_t ri = (i < 0) ? (N[0]+i) : i;
+          if (!mgr.on_core(ri))
+            continue;
+          size_t ai = std::abs(i);
+          for (int j = -1; j<= 1; j++) {
+            size_t rj = (j < 0) ? (N[1]+j) : j;
+            size_t aj = std::abs(j);
+            for (int k = -1; k <= 1; k++) {
+              size_t rk = (k < 0) ? (N[2]+k) : k;
+              size_t ak = std::abs(k);
+
+              real_data[ri][rj][rk] =  KernelCore[ai]*KernelCore[aj]*KernelCore[ak];
+            }
+          }
+        }
+      }
+    }
+
+    template<typename SelectionArray, typename T>
+    void convolve_selection_cic(MPI_Communication *comm, SelectionArray& sel_array, T const* N)
+    {
+
+      typedef FFTW_Manager_3d<double> DFT_Manager;
+      typedef typename DFT_Manager::plan_type plan_type;
+      ConsoleContext<LOG_DEBUG> ctx("convolution of selection function");
+
+      DFT_Manager mgr(N[0], N[1], N[2], comm);
+
+      Uninit_FFTW_Real_Array real_data_p(mgr.extents_real(), mgr.allocator_real);
+      Uninit_FFTW_Complex_Array complex_data_p(mgr.extents_complex(), mgr.allocator_complex);
+      Uninit_FFTW_Complex_Array kernel_data_p(mgr.extents_complex(), mgr.allocator_complex);
+      auto real_data = real_data_p.get_array();
+      auto complex_data = complex_data_p.get_array();
+      auto kernel_data = kernel_data_p.get_array();
+      auto wc = fwrap(complex_data);
+      auto wr = fwrap(real_data);
+      auto kc = fwrap(kernel_data);
+
+      ctx.print("Create plans");
+      plan_type analysis_plan = mgr.create_r2c_plan(real_data.data(), complex_data.data());
+      plan_type synthesis_plan = mgr.create_c2r_plan(complex_data.data(), real_data.data());
+
+      ctx.print("Kernel building");
+      wr = 0;
+      convolveDetails::buildKernel(mgr, real_data);
+      mgr.execute_r2c(analysis_plan, real_data.data(), kernel_data.data());
+
+      ctx.print("Convolve");
+      LibLSS::copy_array(real_data, sel_array);
+      mgr.execute_r2c(analysis_plan, real_data.data(), complex_data.data());
+
+
+      wc = wc * kc * (1.0/(N[0]*N[1]*N[2]));
+
+      mgr.execute_c2r(synthesis_plan, complex_data.data(), real_data.data());
+      
+      auto S = fwrap(sel_array) ;
+      
+      // This is a mask operation, if the condition is true, then S is copied on itself (thus noop).
+      // If the condition is false, then the value is cleared with zero. The wrapping of constant
+      // is not trivial at the moment. 
+      S = mask((S>0)&&(wr>=0), S, fwrap(S.fautowrap(0)));
+      
+      ctx.print("Cleaning up");
+      mgr.destroy_plan(analysis_plan);
+      mgr.destroy_plan(synthesis_plan);
+    }
+
+}
+
+#endif
--- a/libLSS/mcmc/global_state.hpp
+++ b/libLSS/mcmc/global_state.hpp
@ -0,0 +1,513 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mcmc/global_state.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef _GLOBAL_STATE_HPP
+#define _GLOBAL_STATE_HPP
+
+#include <boost/type_traits/is_base_of.hpp>
+#include <boost/format.hpp>
+#include <functional>
+#include <set>
+#include <typeindex>
+#include <algorithm>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/mcmc/state_element.hpp"
+
+namespace LibLSS {
+
+  /**
+   * @brief This is the class that manages the dictionnary that is saved in each MCMC/Restart file.
+   * 
+   * It is *not* copy-constructible.
+   */
+  class MarkovState {
+  public:
+    typedef std::map<std::string, bool> SaveMap;
+    typedef std::map<std::string, StateElement *> StateMap;
+    typedef std::map<std::string, std::type_index> TypeMap;
+    typedef std::set<std::string> Requirements;
+
+  private:
+    SaveMap save_map;
+    StateMap state_map, toProcess;
+    TypeMap type_map;
+    std::list<std::tuple<Requirements, std::function<void()>>> postLoad;
+    std::set<std::string> loaded;
+
+  public:
+    MarkovState(MarkovState const &) = delete;
+
+    /**
+     * @brief Construct a new empty Markov State object.
+     * 
+     */
+    MarkovState() {}
+
+    /**
+     * @brief Destroy the Markov State object.
+     * 
+     * All the elements stored in the dictionnary will be destroyed, as the ownership
+     * is given the dictionnary implicitly when the element is added to it.
+     */
+    ~MarkovState() {
+      for (StateMap::iterator i = state_map.begin(); i != state_map.end();
+           ++i) {
+        Console::instance().print<LOG_VERBOSE>(
+            boost::format("Destroying %s") % i->first);
+        delete i->second;
+      }
+      save_map.clear();
+    }
+
+    template <typename T>
+    static void check_class() {
+      BOOST_MPL_ASSERT_MSG(
+          (boost::is_base_of<StateElement, T>::value), T_is_not_a_StateElement,
+          ());
+    }
+
+    /**
+     * @brief Function to access by its name an element stored in the dictionnary.
+     *  
+     * This function makes a lookup and a dynamic cast to the specified template "StateElement".
+     * It tries to find the indicated state element by name. If it fails an error is thrown.
+     * A dynamic cast is then issued to ensure that the stored type is the same as the requested one.
+     * 
+     * @tparam T     type of the element, cast will be checked
+     * @param name   string id of the element
+     * @return T*    pointer to the element
+     */
+    template <typename T>
+    T *get(const std::string &name) {
+      check_class<T>();
+      StateMap::iterator i = state_map.find(name);
+      if (i == state_map.end() || i->second == 0) {
+        error_helper<ErrorBadState>(
+            boost::format("Invalid access to %s") % name);
+      }
+      T *ptr = dynamic_cast<T *>(i->second);
+      if (ptr == 0) {
+        error_helper<ErrorBadCast>(
+            boost::format("Bad cast in access to %s") % name);
+      }
+      return ptr;
+    }
+
+    /**
+     * @brief Access using a boost::format object.
+     * 
+     * @tparam T 
+     * @param f 
+     * @return T* 
+     */
+    template <typename T>
+    T *get(const boost::format &f) {
+      return get<T>(f.str());
+    }
+
+    static void _format_expansion(boost::format &f) {}
+
+    template <typename A, typename... U>
+    static void _format_expansion(boost::format &f, A &&a, U &&... u) {
+      _format_expansion(f % a, u...);
+    }
+
+    template <typename T, typename... Args>
+    T *formatGet(std::string const &s, Args &&... args) {
+      boost::format f(s);
+      _format_expansion(f, std::forward<Args>(args)...);
+      return get<T>(f);
+    }
+
+    template <typename T>
+    const T *get(const boost::format &f) const {
+      return get<T>(f.str());
+    }
+
+    template <typename T>
+    const T *get(const std::string &name) const {
+      check_class<T>();
+      StateMap::const_iterator i = state_map.find(name);
+      if (i == state_map.end() || i->second == 0) {
+        error_helper<ErrorBadState>(
+            boost::format("Invalid access to %s") % name);
+      }
+
+      const T *ptr = dynamic_cast<const T *>(i->second);
+      if (ptr == 0) {
+        error_helper<ErrorBadCast>(
+            boost::format("Bad cast in access to %s") % name);
+      }
+      return ptr;
+    }
+
+    /**
+     * @brief Check existence of an element in the dictionnary.
+     * 
+     * @param name   string id of the element
+     * @return true  if it exists
+     * @return false if it does not exist
+     */
+    bool exists(const std::string &name) const {
+      return state_map.find(name) != state_map.end();
+    }
+
+    /**
+     * @brief Access an element through operator [] overload.
+     * 
+     * @param name 
+     * @return StateElement& 
+     */
+    StateElement &operator[](const std::string &name) {
+      return *get<StateElement>(name);
+    }
+
+    const StateElement &operator[](const std::string &name) const {
+      return *get<StateElement>(name);
+    }
+
+    std::type_index getStoredType(const std::string &name) const {
+      auto iter = type_map.find(name);
+      if (iter == type_map.end())
+        error_helper<ErrorBadState>(
+            "Unknown entry " + name + " during type query");
+      return iter->second;
+    }
+
+    /**
+     * @brief Add an element in the dictionnary.
+     * 
+     * @param name  string id of the new element
+     * @param elt   Object to add in the dictionnary. The ownership is transferred to MarkovState.
+     * @param write_to_snapshot indicate, if true, that the element has to be written in mcmc files
+     * @return StateElement* the same object as "elt", used to daisy chain calls.
+     */
+    template <typename T>
+    T *newElement(
+        const std::string &name, T *elt,
+        const bool &write_to_snapshot = false) {
+      static_assert(
+          std::is_base_of<StateElement, T>::value,
+          "newElement accepts only StateElement based objects");
+      state_map[name] = elt;
+      type_map.insert(std::pair<std::string, std::type_index>(
+          name, std::type_index(typeid(T))));
+      toProcess[name] = elt;
+      elt->name = name;
+      set_save_in_snapshot(name, write_to_snapshot);
+      return elt;
+    }
+
+    /**
+     * @brief Add an element in the dictionnary.
+     * 
+     * @param f   boost::format object used to build the string-id
+     * @param elt  Object to add in the dictionnary. The ownership is transferred to MarkovState.
+     * @param write_to_snapshot indicate, if true, that the element has to be written in mcmc files
+     * @return StateElement* the same object as "elt", used to daisy chain calls.
+     */
+    template <typename T>
+    T *newElement(
+        const boost::format &f, T *elt, const bool &write_to_snapshot = false) {
+      return newElement(f.str(), elt, write_to_snapshot);
+    }
+
+    /**
+     * @brief Get the content of a series of variables into a static array
+     * That function is an helper to retrieve the value of a series "variable0",
+     * "variable1", ..., "variableQ" of ScalarElement of type Scalar (with Q=N-1).
+     * Such a case is for the length:
+     * @code
+     *  double L[3];
+     *  state.getScalarArray<double, 3>("L", L);
+     * @endcode
+     * This will retrieve L0, L1 and L2 and store their value (double float) in
+     * L[0], L[1], L2].
+     * 
+     * @tparam Scalar inner type of the variable to be retrieved in the dictionnary 
+     * @tparam N      number of elements
+     * @param prefix  prefix for these variables
+     * @param scalars output scalar array
+     */
+    template <typename Scalar, size_t N, typename ScalarArray>
+    void getScalarArray(const std::string &prefix, ScalarArray &&scalars) {
+      for (unsigned int i = 0; i < N; i++) {
+        scalars[i] = getScalar<Scalar>(prefix + std::to_string(i));
+      }
+    }
+
+    ///@deprecated
+    template <typename Scalar>
+    Scalar &getSyncScalar(const std::string &name) {
+      return this->template get<SyncableScalarStateElement<Scalar>>(name)
+          ->value;
+    }
+
+    ///@deprecated
+    template <typename Scalar>
+    Scalar &getSyncScalar(const boost::format &name) {
+      return this->template getSyncScalar<Scalar>(name.str());
+    }
+
+    /**
+     * @brief Get the value of a scalar object.
+     * 
+     * @tparam Scalar 
+     * @param name 
+     * @return Scalar& 
+     */
+    template <typename Scalar>
+    Scalar &getScalar(const std::string &name) {
+      return this->template get<ScalarStateElement<Scalar>>(name)->value;
+    }
+
+    template <typename Scalar>
+    Scalar &getScalar(const boost::format &name) {
+      return this->template getScalar<Scalar>(name.str());
+    }
+
+    template <typename Scalar, typename... U>
+    Scalar &formatGetScalar(std::string const &name, U &&... u) {
+      return this
+          ->template formatGet<ScalarStateElement<Scalar>>(
+              name, std::forward<U>(u)...)
+          ->value;
+    }
+
+    template <typename Scalar>
+    ScalarStateElement<Scalar> *newScalar(
+        const std::string &name, Scalar x,
+        const bool &write_to_snapshot = false) {
+      ScalarStateElement<Scalar> *elt = new ScalarStateElement<Scalar>();
+
+      elt->value = x;
+      newElement(name, elt, write_to_snapshot);
+      return elt;
+    }
+
+    template <typename Scalar>
+    ScalarStateElement<Scalar> *newScalar(
+        const boost::format &name, Scalar x,
+        const bool &write_to_snapshot = false) {
+      return this->newScalar(name.str(), x, write_to_snapshot);
+    }
+
+    ///@deprecated
+    template <typename Scalar>
+    SyncableScalarStateElement<Scalar> *newSyScalar(
+        const std::string &name, Scalar x,
+        const bool &write_to_snapshot = false) {
+      SyncableScalarStateElement<Scalar> *elt =
+          new SyncableScalarStateElement<Scalar>();
+
+      elt->value = x;
+      newElement(name, elt, write_to_snapshot);
+      return elt;
+    }
+
+    ///@deprecated
+    template <typename Scalar>
+    SyncableScalarStateElement<Scalar> *newSyScalar(
+        const boost::format &name, Scalar x,
+        const bool &write_to_snapshot = false) {
+      return this->newSyScalar(name.str(), x, write_to_snapshot);
+    }
+
+    ///@deprecated
+    void mpiSync(MPI_Communication &comm, int root = 0) {
+      namespace ph = std::placeholders;
+      for (StateMap::iterator i = state_map.begin(); i != state_map.end();
+           ++i) {
+        i->second->syncData(std::bind(
+            &MPI_Communication::broadcast, comm, ph::_1, ph::_2, ph::_3, root));
+      }
+    }
+
+    void set_save_in_snapshot(const std::string &name, const bool save) {
+      save_map[name] = save;
+    }
+
+    void set_save_in_snapshot(const boost::format &name, const bool save) {
+      set_save_in_snapshot(name.str(), save);
+    }
+
+    bool get_save_in_snapshot(const std::string &name) {
+      SaveMap::const_iterator i = save_map.find(name);
+      if (i == save_map.end()) {
+        error_helper<ErrorBadState>(
+            boost::format("Invalid access to %s") % name);
+      }
+      return i->second;
+    }
+
+    bool get_save_in_snapshot(const boost::format &name) {
+      return get_save_in_snapshot(name.str());
+    }
+
+    /**
+     * @brief Save the full content of the dictionnary into the indicated HDF5 group.
+     * 
+     * @param fg HDF5 group/file to save the state in.
+     */
+    void saveState(H5_CommonFileGroup &fg) {
+      ConsoleContext<LOG_DEBUG> ctx("saveState");
+      H5::Group g_scalar = fg.createGroup("scalars");
+      for (auto &&i : state_map) {
+        ctx.print("Saving " + i.first);
+        if (i.second->isScalar())
+          i.second->saveTo(g_scalar);
+        else {
+          H5::Group g = fg.createGroup(i.first);
+          i.second->saveTo(g);
+        }
+      }
+    }
+
+    /**
+     * @brief Save the full content of the dictionnary into the indicated HDF5 group.
+     * This is the MPI parallel variant.
+     * 
+     * @param fg HDF5 group/file to save the state in.
+     */
+    void mpiSaveState(
+        std::shared_ptr<H5_CommonFileGroup> fg, MPI_Communication *comm,
+        bool reassembly, const bool write_snapshot = false) {
+      ConsoleContext<LOG_VERBOSE> ctx("mpiSaveState");
+      H5::Group g_scalar;
+      boost::optional<H5_CommonFileGroup &> g_scalar_opt;
+
+      if (fg) {
+        g_scalar = fg->createGroup("scalars");
+        g_scalar_opt = g_scalar;
+      }
+
+      for (auto &&i : state_map) {
+        if (write_snapshot && (!get_save_in_snapshot(i.first))) {
+          ctx.print("Skip saving " + i.first);
+          continue;
+        }
+        ctx.print("Saving " + i.first);
+        if (i.second->isScalar())
+          i.second->saveTo(g_scalar_opt, comm, reassembly);
+        else {
+          H5::Group g;
+          boost::optional<H5_CommonFileGroup &> g_opt;
+          if (fg) {
+            g = fg->createGroup(i.first);
+            g_opt = g;
+          }
+          i.second->saveTo(g_opt, comm, reassembly);
+        }
+      }
+    }
+
+    void restoreStateWithFailure(H5_CommonFileGroup &fg) {
+      Console &cons = Console::instance();
+      H5::Group g_scalar = fg.openGroup("scalars");
+      for (StateMap::iterator i = state_map.begin(); i != state_map.end();
+           ++i) {
+        cons.print<LOG_VERBOSE>("Attempting to restore " + i->first);
+#if H5_VERSION_GE(1, 10, 1)
+        if (!g_scalar.nameExists(i->first)) {
+          cons.print<LOG_WARNING>("Failure to restore");
+          continue;
+        }
+#endif
+        if (i->second->isScalar())
+          // Partial is only valid for 'scalar' types.
+          i->second->loadFrom(g_scalar, false);
+        else {
+          H5::Group g = fg.openGroup(i->first);
+          i->second->loadFrom(g);
+        }
+      }
+    }
+
+    // Function to launch another function once all indicated requirements have been loaded from the
+    // restart file.
+    void subscribePostRestore(
+        Requirements const &requirements, std::function<void()> f) {
+      if (std::includes(
+              requirements.begin(), requirements.end(), loaded.begin(),
+              loaded.end())) {
+        f();
+        return;
+      }
+      postLoad.push_back(std::make_tuple(requirements, f));
+    }
+
+    void triggerPostRestore(std::string const &n) {
+      loaded.insert(n);
+      auto i = postLoad.begin();
+      while (i != postLoad.end()) {
+        auto const &req = std::get<0>(*i);
+        if (!std::includes(
+                req.begin(), req.end(), loaded.begin(), loaded.end())) {
+          ++i;
+          continue;
+        }
+        std::get<1> (*i)();
+        auto j = i;
+        ++j;
+        postLoad.erase(i);
+        i = j;
+      }
+    }
+
+    void restoreState(
+        H5_CommonFileGroup &fg, bool partial = false, bool loadSnapshot = false,
+        bool acceptFailure = false) {
+      Console &cons = Console::instance();
+      H5::Group g_scalar = fg.openGroup("scalars");
+      StateMap currentMap = state_map; // Protect against online modifications
+
+      do {
+        for (StateMap::iterator i = currentMap.begin(); i != currentMap.end();
+             ++i) {
+          if (loadSnapshot && !get_save_in_snapshot(i->first))
+            continue;
+
+          cons.print<LOG_VERBOSE>("Restoring " + i->first);
+#if H5_VERSION_GE(1, 10, 1)
+          if (acceptFailure && !g_scalar.nameExists(i->first)) {
+            cons.print<LOG_WARNING>("Failure to restore. Skipping.");
+            continue;
+          }
+#endif
+          if (i->second->isScalar())
+            // Partial is only valid for 'scalar' types.
+            i->second->loadFrom(g_scalar, partial);
+          else {
+            auto g = fg.openGroup(i->first);
+            i->second->loadFrom(g);
+          }
+          triggerPostRestore(i->first);
+        }
+        currentMap = toProcess;
+        toProcess.clear();
+      } while (currentMap.size() > 0);
+
+      // Clear up all pending
+      if (postLoad.size() > 0) {
+        cons.print<LOG_ERROR>("Some post-restore triggers were not executed.");
+        MPI_Communication::instance()->abort();
+      }
+      loaded.clear();
+      postLoad.clear();
+    }
+  };
+
+  /** @example example_markov_state.cpp
+   *  This is an example of how to use the MarkovState class.
+   */
+
+}; // namespace LibLSS
+
+#endif
--- a/libLSS/mcmc/state_element.cpp
+++ b/libLSS/mcmc/state_element.cpp
@ -0,0 +1,19 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mcmc/state_element.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <H5Cpp.h>
+#include <CosmoTool/hdf5_array.hpp>
+#include "state_element.hpp"
+
+using namespace LibLSS;
+
+StateElement::~StateElement()
+{
+}
+
--- a/libLSS/mcmc/state_element.hpp
+++ b/libLSS/mcmc/state_element.hpp
@ -0,0 +1,609 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mcmc/state_element.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef _LIBLSS_STATE_ELT_HPP
+#define _LIBLSS_STATE_ELT_HPP
+
+#include <Eigen/Core>
+#include "libLSS/tools/align_helper.hpp"
+#include "libLSS/mpi/generic_mpi.hpp"
+#include <boost/function.hpp>
+#include <boost/multi_array.hpp>
+#include <boost/format.hpp>
+#include <boost/lexical_cast.hpp>
+#include <string>
+#include <H5Cpp.h>
+#include <iostream>
+#include <CosmoTool/hdf5_array.hpp>
+#include <map>
+#include <functional>
+#include "libLSS/tools/errors.hpp"
+#include "libLSS/tools/memusage.hpp"
+#include "libLSS/tools/hdf5_type.hpp"
+#include "libLSS/tools/defer.hpp"
+
+namespace LibLSS {
+
+  /**
+     * @brief Generic Markov Chain State element
+     * This is the base class for other more strange elements
+     * 
+     */
+  class StateElement {
+  protected:
+    std::string name;
+    typedef std::function<void(void *, int, MPI_Datatype)> SyncFunction;
+    typedef std::function<void()> NotifyFunction;
+
+  protected:
+    /**
+         * @brief Construct a new State Element object
+         * 
+         */
+    StateElement() : name("_unknown_") {}
+
+    friend class MarkovState;
+    void checkName() {
+      if (name == "_unknown_") {
+        std::cerr << "Name of a state element is undefined" << std::endl;
+        abort();
+      }
+    }
+
+  public:
+    Defer deferLoad, deferInit;
+
+    /**
+     * @brief Destroy the State Element object
+     * 
+     */
+    virtual ~StateElement();
+
+    /**
+     * @brief Register a functor get notifications when this element is finished being loaded.
+     * @deprecated Use deferLoad directly
+     * 
+     * @param f the functor, must support copy-constructible.
+     * @sa loaded
+     */
+    void subscribeLoaded(NotifyFunction f) { deferLoad.ready(f); }
+
+    /**
+     * @brief Send a message that the element has been loaded.
+     * @deprecated Use deferLoad directly.
+     * @sa subscribeLoaded
+     */
+    void loaded() { deferLoad.submit_ready(); }
+
+    /**
+     * @brief Get the name of this state element. This is used to store it in file.
+     * 
+     * @return const std::string& 
+     */
+    const std::string &getName() const { return name; }
+
+    /**
+     * @brief Check if this element is a scalar.
+     * 
+     * @return true if it is a scalar, i.e. trivially serializable
+     * @return false it it is not, requires a lot more operations to (de)serialize.
+     */
+    virtual bool isScalar() const { return false; }
+
+    bool updated() { return false; }
+
+    /**
+     * @brief Save the element to an HDF5 group, only one core is using the file.
+     *
+     */
+    virtual void saveTo(
+        boost::optional<H5_CommonFileGroup &> fg, MPI_Communication *comm = 0,
+        bool partialSave = true) = 0;
+
+    /**
+     * @brief Save the element to an HDF5 group.
+     * 
+     * @param fg an HDF5 group/file
+     * @param comm an MPI communicator
+     * @param partialSave whether only the partial save is requested (i.e. generate restart file).
+     */
+    virtual void saveTo(
+        H5_CommonFileGroup &fg, MPI_Communication *comm = 0,
+        bool partialSave = true) {
+      boost::optional<H5_CommonFileGroup &> o_fg = fg;
+      saveTo(o_fg, comm, partialSave);
+    }
+
+    virtual void saveTo2(
+        std::shared_ptr<H5_CommonFileGroup> fg, MPI_Communication *comm = 0,
+        bool partialSave = true) {
+      boost::optional<H5_CommonFileGroup &> o_fg;
+      if (fg)
+        o_fg = *fg;
+      saveTo(o_fg, comm, partialSave);
+    }
+
+    /**
+     * @brief 
+     * 
+     * @param fg 
+     * @param partialLoad 
+     */
+    virtual void loadFrom(H5_CommonFileGroup &fg, bool partialLoad = true) = 0;
+    virtual void syncData(SyncFunction f) = 0;
+  };
+
+  /* Generic array template class for Markov Chain state element. It supports all scalars
+    * and complex derived types.
+    */
+  template <class AType, bool NeedReassembly = false>
+  class GenericArrayStateElement : public StateElement {
+  public:
+    enum { Reassembly = NeedReassembly };
+    typedef AType ArrayType;
+    typedef typename ArrayType::element element;
+    typedef typename ArrayType::index_gen index_gen;
+    std::vector<hsize_t> real_dims;
+    std::shared_ptr<ArrayType> array;
+    bool realDimSet;
+    bool resetOnSave;
+    element reset_value;
+    bool auto_resize;
+
+    bool requireReassembly() const { return (bool)Reassembly == true; }
+    void setResetOnSave(const element &_reset_value) {
+      this->reset_value = _reset_value;
+      resetOnSave = true;
+    }
+    void setAutoResize(bool do_resize) { auto_resize = do_resize; }
+
+    template <typename ExtentDim>
+    void setRealDims(const ExtentDim &d) {
+      Console::instance().c_assert(
+          d.size() == real_dims.size(), "Invalid dimension size");
+      std::copy(d.begin(), d.end(), real_dims.begin());
+      realDimSet = true;
+    }
+
+    GenericArrayStateElement()
+        : StateElement(), real_dims(ArrayType::dimensionality),
+          realDimSet(false), resetOnSave(false), auto_resize(false) {}
+    virtual ~GenericArrayStateElement() {}
+
+    virtual bool isScalar() const { return true; }
+
+    virtual void saveTo(
+        boost::optional<H5_CommonFileGroup &> fg, MPI_Communication *comm = 0,
+        bool partialSave = true) {
+      checkName();
+      try {
+        if (!requireReassembly() || partialSave) {
+          ConsoleContext<LOG_DEBUG> ctx("saveTo(): saving variable " + name);
+          if (partialSave || (comm != 0 && comm->rank() == 0)) {
+            ctx.print("partialSave or rank==0");
+            if (!fg) {
+              error_helper<ErrorBadState>(
+                  "saveTo() requires a valid HDF5 handle on this core.");
+            }
+            CosmoTool::hdf5_write_array(*fg, name, *array);
+          } else {
+            ctx.print("Non-root rank and not partial save. Just passthrough.");
+          }
+        } else {
+          CosmoTool::get_hdf5_data_type<element> HT;
+          Console::instance().c_assert(
+              comm != 0, "Array need reassembly and no communicator given");
+          Console::instance().c_assert(
+              realDimSet,
+              "Real dimensions of the array over communicator is not set for " +
+                  this->getName());
+          std::vector<hsize_t> remote_bases(ArrayType::dimensionality);
+          std::vector<hsize_t> remote_dims(ArrayType::dimensionality);
+          MPI_Datatype dt = translateMPIType<hsize_t>();
+          MPI_Datatype et = translateMPIType<element>();
+
+          ConsoleContext<LOG_DEBUG> ctx("reassembling of variable " + name);
+
+          if (comm->rank() == 0) {
+            if (!fg)
+              error_helper<ErrorBadState>(
+                  "saveTo() requires a valid HDF5 handle on this core.");
+
+            ctx.print("Writing rank 0 data first. Dimensions = ");
+            for (size_t n = 0; n < real_dims.size(); n++)
+              ctx.print(boost::lexical_cast<std::string>(real_dims[n]));
+            CosmoTool::hdf5_write_array(
+                *fg, name, *array, HT.type(), real_dims, true, true);
+
+            ctx.print("Grabbing other rank data");
+            for (int r = 1; r < comm->size(); r++) {
+              ArrayType a;
+
+              ctx.print(boost::format("Incoming data from rank %d") % r);
+              comm->recv(
+                  remote_dims.data(), ArrayType::dimensionality, dt, r, 0);
+              comm->recv(
+                  remote_bases.data(), ArrayType::dimensionality, dt, r, 1);
+              a.resize(
+                  CosmoTool::hdf5_extent_gen<ArrayType::dimensionality>::build(
+                      remote_dims.data()));
+              a.reindex(remote_bases);
+              comm->recv(a.data(), a.num_elements(), et, r, 2);
+              CosmoTool::hdf5_write_array(
+                  *fg, name, a, HT.type(), real_dims, false, true);
+            }
+          } else {
+            ctx.print("Sending data");
+            comm->send(array->shape(), ArrayType::dimensionality, dt, 0, 0);
+            comm->send(
+                array->index_bases(), ArrayType::dimensionality, dt, 0, 1);
+            comm->send(array->data(), array->num_elements(), et, 0, 2);
+          }
+        }
+        if (resetOnSave)
+          fill(reset_value);
+      } catch (const H5::Exception &e) {
+        error_helper<ErrorIO>(e.getDetailMsg());
+      }
+    }
+
+    virtual void loadFrom(H5_CommonFileGroup &fg, bool partialLoad = false) {
+      checkName();
+      try {
+        if (!requireReassembly() || !partialLoad) {
+          ConsoleContext<LOG_DEBUG> ctx("loadFrom full");
+          ctx.print(
+              boost::format("loadFrom(reassembly=%d,partialLoad=%d,autoresize=%"
+                            "d): loading variable %s") %
+              requireReassembly() % partialLoad % auto_resize % name);
+          ctx.print("partialSave or rank==0");
+          CosmoTool::hdf5_read_array(fg, name, *array, auto_resize);
+        } else {
+          Console::instance().c_assert(
+              realDimSet,
+              "Real dimensions of the array over communicator is not set for " +
+                  this->getName());
+          std::vector<hsize_t> remote_bases(ArrayType::dimensionality);
+          std::vector<hsize_t> remote_dims(ArrayType::dimensionality);
+
+          ConsoleContext<LOG_DEBUG> ctx("dissassembling of variable " + name);
+          CosmoTool::hdf5_read_array(fg, name, *array, false, true);
+        }
+      } catch (const CosmoTool::InvalidDimensions &) {
+        error_helper<ErrorBadState>(
+            boost::format("Incompatible array size loading '%s'") % getName());
+      } catch (const H5::GroupIException &) {
+        error_helper<ErrorIO>(
+            "Could not open variable " + getName() + " in state file");
+      } catch (const H5::DataSetIException &error) {
+        error_helper<ErrorIO>(
+            "Could not open variable " + getName() + " in state file");
+      }
+      loaded();
+    }
+
+    virtual void syncData(SyncFunction f) {
+      typename ArrayType::size_type S;
+      f(array->data(), array->num_elements(),
+        translateMPIType<typename AType::element>());
+    }
+
+    virtual void fill(const element &v) {
+//#pragma omp simd
+#pragma omp parallel for
+      for (size_t i = 0; i < array->num_elements(); i++)
+        array->data()[i] = v;
+    }
+  };
+
+  template <
+      typename T, std::size_t DIMENSIONS,
+      typename Allocator = LibLSS::track_allocator<T>,
+      bool NeedReassembly = false>
+  class ArrayStateElement
+      : public GenericArrayStateElement<
+            boost::multi_array<T, DIMENSIONS, Allocator>, NeedReassembly> {
+    typedef GenericArrayStateElement<
+        boost::multi_array<T, DIMENSIONS, Allocator>, NeedReassembly>
+        super_type;
+
+  public:
+    typedef typename super_type::ArrayType ArrayType;
+    typedef typename boost::multi_array_ref<T, DIMENSIONS> RefArrayType;
+    typedef typename super_type::index_gen index_gen;
+
+    enum { AlignState = DetectAlignment<Allocator>::Align };
+    typedef Eigen::Array<T, Eigen::Dynamic, 1> E_Array;
+    typedef Eigen::Map<E_Array, AlignState> MapArray;
+
+    template <typename ExtentList>
+    ArrayStateElement(
+        const ExtentList &extents, const Allocator &allocator = Allocator(),
+        const boost::general_storage_order<DIMENSIONS> &ordering =
+            boost::c_storage_order())
+        : super_type() {
+      this->array = std::make_shared<ArrayType>(extents, ordering, allocator);
+      Console::instance().print<LOG_DEBUG>(
+          std::string("Creating array which is ") +
+          ((((int)AlignState == (int)Eigen::Aligned) ? "ALIGNED"
+                                                     : "UNALIGNED")));
+    }
+
+    MapArray eigen() {
+      return MapArray(this->array->data(), this->array->num_elements());
+    }
+
+    virtual void fill(const typename super_type::element &v) {
+      eigen().fill(v);
+    }
+
+    // This is unsafe. Use it with precaution
+    void unsafeSetName(const std::string &n) { this->name = n; }
+  };
+
+  template <typename T, std::size_t DIMENSIONS>
+  class RefArrayStateElement
+      : public GenericArrayStateElement<boost::multi_array_ref<T, DIMENSIONS>> {
+  public:
+    typedef boost::multi_array_ref<T, DIMENSIONS> ArrayType;
+    typedef boost::multi_array_ref<T, DIMENSIONS> RefArrayType;
+
+    template <typename ExtentList>
+    RefArrayStateElement(
+        T *data, const ExtentList &extents,
+        const boost::general_storage_order<DIMENSIONS> &ordering =
+            boost::c_storage_order())
+        : StateElement() {
+      this->array = std::make_shared<ArrayType>(data, extents);
+    }
+  };
+
+  template <typename U>
+  struct _scalar_writer {
+    template <typename DT>
+    static inline void write(H5::DataSet &dataset, U &v, DT dt) {
+      dataset.write(&v, dt.type());
+    }
+
+    template <typename DT>
+    static inline void read(H5::DataSet &dataset, U &v, DT dt) {
+      dataset.read(&v, dt.type());
+    }
+  };
+
+  template <>
+  struct _scalar_writer<std::string> {
+    template <typename DT>
+    static inline void write(H5::DataSet &dataset, std::string &v, DT dt) {
+      dataset.write(v, dt.type());
+    }
+
+    template <typename DT>
+    static inline void read(H5::DataSet &dataset, std::string &v, DT dt) {
+      dataset.read(v, dt.type());
+    }
+  };
+
+  /* Generic scalar Markov State element. */
+  template <typename T>
+  class ScalarStateElement : public StateElement {
+  public:
+    T value;
+    T reset_value;
+    bool resetOnSave;
+    bool doNotRestore;
+
+    ScalarStateElement()
+        : StateElement(), value(), reset_value(), resetOnSave(false),
+          doNotRestore(false) {}
+    virtual ~ScalarStateElement() {}
+
+    void setDoNotRestore(bool doNotRestore) {
+      this->doNotRestore = doNotRestore;
+    }
+    void setResetOnSave(const T &_reset_value) {
+      this->reset_value = _reset_value;
+      resetOnSave = true;
+    }
+
+    virtual void saveTo(
+        boost::optional<H5_CommonFileGroup &> fg, MPI_Communication *comm = 0,
+        bool partialSave = true) {
+      CosmoTool::get_hdf5_data_type<T> hdf_data_type;
+      std::vector<hsize_t> dimensions(1);
+      dimensions[0] = 1;
+
+      if (partialSave || (comm != 0 && comm->rank() == 0)) {
+        checkName();
+        H5::DataSpace dataspace(1, dimensions.data());
+        H5::DataSet dataset =
+            (*fg).createDataSet(name, hdf_data_type.type(), dataspace);
+
+        _scalar_writer<T>::write(dataset, value, hdf_data_type);
+        if (resetOnSave)
+          value = reset_value;
+      }
+    }
+
+    virtual void loadFrom(H5_CommonFileGroup &fg, bool partialLoad = true) {
+      CosmoTool::get_hdf5_data_type<T> hdf_data_type;
+      std::vector<hsize_t> dimensions(1);
+      H5::DataSet dataset;
+
+      if (doNotRestore) {
+        return;
+      }
+
+      dimensions[0] = 1;
+
+      checkName();
+      try {
+        dataset = fg.openDataSet(name);
+      } catch (const H5::GroupIException &) {
+        error_helper<ErrorIO>(
+            "Could not find variable " + name + " in state file.");
+      }
+      H5::DataSpace dataspace = dataset.getSpace();
+      hsize_t n;
+
+      if (dataspace.getSimpleExtentNdims() != 1)
+        error_helper<ErrorIO>("Invalid stored dimension for " + getName());
+
+      dataspace.getSimpleExtentDims(&n);
+      if (n != 1)
+        error_helper<ErrorIO>("Invalid stored dimension for " + getName());
+
+      _scalar_writer<T>::read(dataset, value, hdf_data_type);
+      loaded();
+    }
+
+    operator T() { return value; }
+
+    virtual bool isScalar() const { return true; }
+
+    virtual void syncData(SyncFunction f) {
+      error_helper<ErrorBadState>(
+          "MPI synchronization not supported by this type");
+    }
+  };
+
+  template <typename T>
+  class SyncableScalarStateElement : public ScalarStateElement<T> {
+  public:
+    typedef typename ScalarStateElement<T>::SyncFunction SyncFunction;
+
+    virtual void syncData(SyncFunction f) {
+      f(&this->value, 1, translateMPIType<T>());
+    }
+  };
+
+  template <typename T>
+  class SharedObjectStateElement : public StateElement {
+  public:
+    std::shared_ptr<T> obj;
+
+    SharedObjectStateElement() : StateElement() {}
+    SharedObjectStateElement(std::shared_ptr<T> &src)
+        : StateElement(), obj(src) {}
+    SharedObjectStateElement(std::shared_ptr<T> &&src)
+        : StateElement(), obj(src) {}
+    virtual ~SharedObjectStateElement() {}
+
+    virtual void saveTo(
+        boost::optional<CosmoTool::H5_CommonFileGroup &> fg,
+        MPI_Communication *comm = 0, bool partialSave = true) {
+      if (fg)
+        obj->save(*fg);
+    }
+
+    virtual void
+    loadFrom(CosmoTool::H5_CommonFileGroup &fg, bool partialSave = true) {
+      obj->restore(fg);
+      loaded();
+    }
+
+    operator T &() { return *obj; }
+
+    T &get() { return *obj; }
+    const T &get() const { return *obj; }
+
+    virtual void syncData(SyncFunction f) {}
+  };
+
+  template <typename T, bool autofree>
+  class ObjectStateElement : public StateElement {
+  public:
+    T *obj;
+
+    ObjectStateElement() : StateElement() {}
+    ObjectStateElement(T *o) : StateElement(), obj(o) {}
+    virtual ~ObjectStateElement() {
+      if (autofree)
+        delete obj;
+    }
+
+    virtual void saveTo(
+        boost::optional<H5_CommonFileGroup &> fg, MPI_Communication *comm = 0,
+        bool partialSave = true) {
+      if (fg)
+        obj->save(*fg);
+    }
+
+    virtual void loadFrom(H5_CommonFileGroup &fg, bool partialSave = true) {
+      obj->restore(fg);
+      loaded();
+    }
+
+    operator T &() { return *obj; }
+
+    T &get() { return *obj; }
+    const T &get() const { return *obj; }
+
+    virtual void syncData(SyncFunction f) {}
+  };
+
+  template <class T>
+  class TemporaryElement : public StateElement {
+  protected:
+    T obj;
+
+  public:
+    TemporaryElement(T const &a) : obj(a) {}
+    TemporaryElement(T &&a) : obj(a) {}
+
+    operator T &() { return obj; }
+
+    T &get() { return obj; }
+    const T &get() const { return obj; }
+
+    virtual void saveTo(
+        boost::optional<H5_CommonFileGroup &> fg, MPI_Communication *comm = 0,
+        bool partialSave = true) {}
+
+    virtual void loadFrom(H5_CommonFileGroup &fg, bool partialSave = true) {}
+
+    virtual void syncData(SyncFunction f) {}
+  };
+
+  template <class T>
+  class RandomStateElement : public StateElement {
+  protected:
+    std::shared_ptr<T> rng;
+
+  public:
+    RandomStateElement(T *generator, bool handover_ = false) {
+      if (handover_) {
+        rng = std::shared_ptr<T>(generator, [](T *a) { delete a; });
+      } else {
+        rng = std::shared_ptr<T>(generator, [](T *a) {});
+      }
+    }
+    RandomStateElement(std::shared_ptr<T> generator) : rng(generator) {}
+    virtual ~RandomStateElement() {}
+
+    const T &get() const { return *rng; }
+    T &get() { return *rng; }
+
+    virtual void saveTo(
+        boost::optional<H5_CommonFileGroup &> fg, MPI_Communication *comm = 0,
+        bool partialSave = true) {
+      if (fg)
+        rng->save(*fg);
+    }
+
+    virtual void loadFrom(H5_CommonFileGroup &fg, bool partialLoad = false) {
+      rng->restore(fg, partialLoad);
+      loaded();
+    }
+
+    virtual void syncData(SyncFunction f) {}
+  };
+}; // namespace LibLSS
+
+#endif
--- a/libLSS/mcmc/state_sync.hpp
+++ b/libLSS/mcmc/state_sync.hpp
@ -0,0 +1,88 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mcmc/state_sync.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_STATE_ELEMENT_SYNC_HPP
+#define __LIBLSS_STATE_ELEMENT_SYNC_HPP
+
+#include <functional>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/mpi/generic_mpi.hpp"
+
+namespace LibLSS {
+
+    class StateElement;
+    
+    /**
+     * Helper class to synchronize many StateElement variable at the same time with MPI.
+     * @deprecated
+     */
+    class MPI_SyncBundle {
+    protected:
+        typedef std::list<StateElement *> List;
+        
+        List list;
+    public:
+	/// Constructor
+        MPI_SyncBundle() {}
+        ~MPI_SyncBundle() {}
+        
+	/**
+	 * Add a specified element to the bundle.
+	 * @param e the element to be added
+	 */
+        MPI_SyncBundle& operator+=(StateElement *e) {
+            list.push_back(e);
+            return *this;
+        }
+
+	/**
+	 * Execute the provided synchronization function on all elements
+	 * of the bundle.
+	 * @param f the Functor to be executed.
+	 */
+        template<typename Function>
+        void syncData(Function f) {
+            ConsoleContext<LOG_DEBUG> ctx("sync bundle");
+            for (List::iterator i = list.begin(); i != list.end(); ++i)
+                (*i)->syncData(f);
+        }
+        
+	/**
+	 * Execute a broadcast operation on the bundle.
+	 * @param comm the MPI communicator.
+	 * @param root the root for the broadcast operation (default is 0).
+	 */
+        void mpiBroadcast(MPI_Communication& comm, int root = 0) {
+	    namespace ph = std::placeholders;
+            syncData(std::bind(&MPI_Communication::broadcast, comm, ph::_1, ph::_2, ph::_3, root));
+        }
+
+	/**
+	 * Execute a all reduce (max) operation on the bundle.
+	 * @param comm the MPI communicator.
+	 */
+        void mpiAllMax(MPI_Communication& comm) {
+	    namespace ph = std::placeholders;
+            syncData(std::bind(&MPI_Communication::all_reduce, comm, MPI_IN_PLACE, ph::_1, ph::_2, ph::_3, MPI_MAX));
+        }
+
+	/**
+	 * Execute a all reduce (sum) operation on the bundle.
+	 * @param comm the MPI communicator.
+	 */
+        void mpiAllSum(MPI_Communication& comm) {
+	    namespace ph = std::placeholders;
+            syncData(std::bind(&MPI_Communication::all_reduce, comm, MPI_IN_PLACE, ph::_1, ph::_2, ph::_3, MPI_SUM));
+        }
+        
+    };
+    
+};
+
+#endif
--- a/libLSS/mpi/fake_mpi/mpi_communication.cpp
+++ b/libLSS/mpi/fake_mpi/mpi_communication.cpp
@ -0,0 +1,13 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mpi/fake_mpi/mpi_communication.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include "mpi_type_translator.hpp"
+#include "mpi_communication.hpp"
+
+LibLSS::MPI_Communication *LibLSS::MPI_Communication::singleton = 0;
--- a/libLSS/mpi/fake_mpi/mpi_communication.hpp
+++ b/libLSS/mpi/fake_mpi/mpi_communication.hpp
@ -0,0 +1,394 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mpi/fake_mpi/mpi_communication.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+
+#ifndef __CMB_FAKE_MPI_COMMUNICATION_HPP
+#define __CMB_FAKE_MPI_COMMUNICATION_HPP
+
+#include <string>
+#include <exception>
+#include <cstdlib>
+#include <cstring>
+#include <boost/multi_array.hpp>
+
+typedef void *MPI_Comm;
+
+namespace LibLSS {
+  typedef struct {
+    int MPI_ERROR;
+  } MPI_Status;
+  typedef int MPI_Op;
+
+  static const void *MPI_IN_PLACE = (const void *)0;
+  static MPI_Status *const MPI_STATUS_IGNORE = (MPI_Status *)1;
+  static const int MPI_SUCCESS = 0;
+  static const int MPI_SUM = 0;
+  static const int MPI_MIN = 1;
+  static const int MPI_MAX = 2;
+  static const int MPI_LAND = 3; //FIXME can I assign any number?
+
+  class MPI_Exception : public std::exception {
+  public:
+    MPI_Exception(int err) : errcode(err) {}
+
+    virtual const char *what() const throw() { return err_string.c_str(); }
+    int code() const { return errcode; }
+
+    virtual ~MPI_Exception() throw() {}
+
+  private:
+    std::string err_string;
+    int errcode;
+  };
+
+  class MPICC_Request {
+  public:
+    MPICC_Request() {}
+
+    bool test(MPI_Status *status = MPI_STATUS_IGNORE) { return true; }
+
+    bool is_active() const { return false; }
+
+    void free() {}
+
+    void wait(MPI_Status *status = MPI_STATUS_IGNORE) {}
+  };
+
+  typedef boost::multi_array<MPICC_Request, 1> RequestArray;
+  typedef boost::multi_array<MPI_Status, 1> StatusArray;
+
+  class MPICC_Window {
+  public:
+    void *w;
+
+    void lock(bool) {}
+    void unlock() {}
+
+    void fence() {}
+    void destroy() { delete[]((char *)w); }
+
+    template <typename T>
+    void put(int r, T v) {
+      (reinterpret_cast<T *>(w))[r] = v;
+    }
+
+    template <typename T>
+    T get(int r) {
+      return (reinterpret_cast<T *>(w))[r];
+    }
+
+    template <typename T>
+    T *get_ptr() {
+      return (T *)w;
+    }
+
+    template <typename T>
+    const T *get_ptr() const {
+      return (const T *)w;
+    }
+  };
+
+  class MPICC_Mutex {
+  public:
+    void acquire() {}
+    void release() {}
+  };
+
+  class MPI_Communication {
+  private:
+    friend MPI_Communication *setupMPI(int &argc, char **&argv);
+    friend MPI_Communication *setupMPI(MPI_Comm w);
+
+    static MPI_Communication *singleton;
+
+  public:
+    typedef MPICC_Request Request;
+
+    MPI_Communication() {}
+
+    MPI_Communication(void*) {}
+
+    ~MPI_Communication() {}
+
+    static MPI_Communication *instance() { return singleton; }
+
+    MPI_Communication *split(int color = 0, int key = 0) {
+      return new MPI_Communication();
+    }
+
+    int rank() const { return 0; }
+
+    int size() const { return 1; }
+
+    MPI_Comm comm() { return 0; }
+
+    void abort() { ::abort(); }
+
+    MPICC_Mutex *new_mutex(int tag) { return new MPICC_Mutex(); }
+
+    MPICC_Window win_create(int size, int disp_unit) {
+      MPICC_Window w;
+
+      w.w = new char[size];
+      return w;
+    }
+
+    void send_recv(
+        const void *sendbuf, int sendcount, MPI_Datatype sdatatype, int dest,
+        int sendtag, void *recvbuf, int recvcount, MPI_Datatype rdatatype,
+        int source, int recvtag, MPI_Status *s = 0) {
+      if (source != 0 || dest != 0 || sendcount != recvcount ||
+          recvtag != sendtag)
+        throw MPI_Exception(0);
+      ::memcpy(recvbuf, sendbuf, sendcount * sdatatype);
+    }
+
+    void
+    send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag) {
+      throw MPI_Exception(0);
+    }
+
+    void recv(
+        void *buf, int count, MPI_Datatype datatype, int from, int tag,
+        MPI_Status *status = 0) {
+      throw MPI_Exception(0);
+    }
+
+    void reduce(
+        const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+        MPI_Op op, int root) {
+      if (sendbuf != MPI_IN_PLACE)
+        ::memcpy(recvbuf, sendbuf, count * datatype);
+    }
+
+    Request
+    Irecv(void *buf, int count, MPI_Datatype datatype, int from, int tag) {
+      Request req;
+
+      recv(buf, count, datatype, from, tag);
+      return req;
+    }
+
+    Request
+    Isend(void *buf, int count, MPI_Datatype datatype, int to, int tag) {
+      Request req;
+
+      send(buf, count, datatype, to, tag);
+      return req;
+    }
+
+    Request Igather(
+        void const *sendbuf, int sendcount, MPI_Datatype sendtype, void *buf,
+        int recvcount, MPI_Datatype recvtype, int root) {
+      return Request();
+    }
+
+    template <typename T>
+    Request IrecvT(T *buf, int count, int from, int tag) {
+      return Irecv(buf, count, translateMPIType<T>(), from, tag);
+    }
+
+    template <typename T>
+    Request IsendT(T *buf, int count, int from, int tag) {
+      return Isend(buf, count, translateMPIType<T>(), from, tag);
+    }
+
+    template <typename T>
+    Request
+    IgatherT(T const *sendbuf, int sendcount, T *buf, int recvcount, int root) {
+      return Igather(
+          sendbuf, sendcount, translateMPIType<T>(), buf, recvcount,
+          translateMPIType<T>(), root);
+    }
+
+    static void WaitAll(RequestArray &reqs, StatusArray &statuses) {}
+
+    static void WaitAll(
+        std::vector<Request> &reqs,
+        std::vector<MPI_Status> &&statuses = std::vector<MPI_Status>()) {}
+
+    void broadcast(
+        void *sendrecbuf, int sendrec_count, MPI_Datatype sr_type, int root) {}
+
+    void scatter(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, int recvcount, MPI_Datatype recvtype, int root) {
+      throw MPI_Exception(0);
+    }
+
+    void all2all(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, int recvcount, MPI_Datatype recvtype) {
+      memcpy(recvbuf, sendbuf, recvcount * recvtype);
+    }
+
+    template <typename T>
+    void all2allT(const T *sendbuf, int sendcount, T *recvbuf, int recvcount) {
+      all2all(
+          sendbuf, sendcount, translateMPIType<T>(), recvbuf, recvcount,
+          translateMPIType<T>());
+    }
+
+    void all_reduce(
+        const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+        MPI_Op op) {
+      if (sendbuf != MPI_IN_PLACE)
+        ::memcpy(recvbuf, sendbuf, count * datatype);
+    }
+
+    void all_gather(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, int recvcount, MPI_Datatype recvtype) {
+      if (sendbuf != recvbuf)
+        memcpy(recvbuf, sendbuf, size_t(sendtype) * size_t(sendcount));
+    }
+
+    template <typename T>
+    void
+    reduce_t(const void *sendbuf, T *recvbuf, int count, MPI_Op op, int root) {
+      reduce(sendbuf, recvbuf, count, translateMPIType<T>(), op, root);
+    }
+
+    template <typename T>
+    void broadcast_t(T *sendrecbuf, int count, int root) {
+      broadcast(sendrecbuf, count, translateMPIType<T>(), root);
+    }
+
+    template <typename T>
+    void all_reduce_t(const void *sendbuf, T *recvbuf, int count, MPI_Op op) {
+      all_reduce(sendbuf, recvbuf, count, translateMPIType<T>(), op);
+    }
+
+    template <typename T>
+    void
+    all_gather_t(const T *sendbuf, int sendcount, T *recvbuf, int recvcount) {
+      all_gather(
+          sendbuf, sendcount, translateMPIType<T>(), recvbuf, recvcount,
+          translateMPIType<T>());
+    }
+
+    void barrier() {}
+
+    template <typename T>
+    void accum(T *target_array, const T *source_array, int count, int root) {
+      if (root != 0)
+        throw MPI_Exception(0);
+
+      if (target_array != source_array)
+        ::memcpy(target_array, source_array, count * sizeof(T));
+    }
+
+    template <typename T>
+    void all_accum(T *ts_array, int count) {}
+
+    template <typename T>
+    void all_gather_t(T *recvbuf, int recvcount) {}
+
+    Request Ireduce(
+        const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+        MPI_Op op, int root) {
+      return Request();
+    }
+
+    Request IallReduce(
+        const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+        MPI_Op op) {
+      return Request();
+    }
+
+    template <typename T>
+    Request
+    IreduceT(const void *sendbuf, T *recvbuf, int count, MPI_Op op, int root) {
+      return Ireduce(sendbuf, recvbuf, count, translateMPIType<T>(), op, root);
+    }
+
+    template <typename T>
+    Request IallReduceT(const void *sendbuf, T *recvbuf, int count, MPI_Op op) {
+      return IallReduce(sendbuf, recvbuf, count, translateMPIType<T>(), op);
+    }
+
+    Request
+    Ibroadcast(void *buffer, int count, MPI_Datatype datatype, int root) {
+      return Request();
+    }
+
+    template <typename T>
+    Request IbroadcastT(T *buf, int count, int root) {
+      return Ibroadcast(buf, count, translateMPIType<T>(), root);
+    }
+
+    void all_gatherv(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, const int recvcounts[], const int displs[],
+        MPI_Datatype recvtype) {
+      if (sendbuf != recvbuf)
+        memcpy(recvbuf, sendbuf, size_t(sendtype) * size_t(sendcount));
+    };
+
+    template <typename T>
+    void all_gatherv_t(
+        const T *sendbuf, int sendcount, T *recvbuf, const int *recvcounts,
+        const int displs[]) {
+      all_gatherv(
+          sendbuf, sendcount, translateMPIType<T>(), recvbuf, recvcounts,
+          displs, translateMPIType<T>());
+    }
+
+    template <typename T>
+    void all_gatherv_t(T *recvbuf, const int *recvcounts, const int *displs) {}
+
+    void all2allv(
+        const void *sendbuf, const int *sendcounts, const int sdispls[],
+        MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
+        const int rdispls[], MPI_Datatype recvtype) {
+      memcpy(recvbuf, sendbuf, recvcounts[0] * recvtype);
+    }
+
+    template <typename T>
+    void all2allv_t(
+        const T *sendbuf, const int *sendcounts, const int *sdispls, T *recvbuf,
+        const int *recvcounts, const int *rdispls) {
+      all2allv(
+          sendbuf, sendcounts, sdispls, translateMPIType<T>(), recvbuf,
+          recvcounts, rdispls, translateMPIType<T>());
+    }
+
+    template <typename T>
+    Request Iall2allv_t(
+        const T *sendbuf, const int *sendcounts, const int *sdispls,
+        MPI_Datatype sendtype, T *recvbuf, const int *recvcounts,
+        const int *rdispls, MPI_Datatype recvtype) {
+
+      return Request();
+    }
+
+    template <typename T>
+    Request Iall2allv_t(
+        const T *sendbuf, const int *sendcounts, const int *sdispls, T *recvbuf,
+        const int *recvcounts, const int *rdispls) {
+      return Iall2allv(
+          sendbuf, sendcounts, sdispls, translateMPIType<T>(), recvbuf,
+          recvcounts, rdispls, translateMPIType<T>());
+    }
+  };
+
+  inline MPI_Communication *setupMPI(int &argc, char **&argv) {
+    MPI_Communication::singleton = new MPI_Communication();
+    return MPI_Communication::singleton;
+  }
+
+  inline MPI_Communication *setupMPI(MPI_Comm w) {
+    MPI_Communication::singleton = new MPI_Communication();
+    return MPI_Communication::singleton;
+  }
+
+  inline void doneMPI() {}
+}; // namespace LibLSS
+
+#endif
--- a/libLSS/mpi/fake_mpi/mpi_type_translator.hpp
+++ b/libLSS/mpi/fake_mpi/mpi_type_translator.hpp
@ -0,0 +1,76 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mpi/fake_mpi/mpi_type_translator.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+
+#ifndef FAKE_MPI_TYPE_TRANSLATOR_HPP_INCLUDED
+#define FAKE_MPI_TYPE_TRANSLATOR_HPP_INCLUDED
+
+#include <complex>
+
+namespace LibLSS
+{
+  typedef int MPI_Datatype;
+  static const int MPI_INT = 0;
+  static const int MPI_LONG = 1;
+  static const int MPI_DOUBLE = 2;
+  static const int MPI_LONG_DOUBLE = 3;
+  static const int MPI_INTEGER = 0;
+  static const int MPI_UNSIGNED = 0;
+  static const int MPI_UNSIGNED_LONG = 1;
+
+  template<typename T>
+  MPI_Datatype translateMPIType();
+
+#define MPI_FORCE_TYPE(T) \
+  template<> \
+  inline MPI_Datatype translateMPIType<T>() \
+  { \
+    return sizeof(T); \
+  }
+
+#define MPI_FORCE_COMPOUND_TYPE(T) \
+  template<> \
+  inline MPI_Datatype translateMPIType<T>() \
+  { \
+    return sizeof(T); \
+  }
+
+  MPI_FORCE_TYPE(int);
+  MPI_FORCE_TYPE(double);
+  MPI_FORCE_TYPE(long double);
+#ifdef __GNU__
+  MPI_FORCE_TYPE(__float128);
+#endif
+  MPI_FORCE_TYPE(float);
+  MPI_FORCE_TYPE(long);
+  MPI_FORCE_TYPE(long long);
+  MPI_FORCE_TYPE(unsigned long);
+  MPI_FORCE_TYPE(unsigned long long);
+  MPI_FORCE_TYPE(bool);
+  MPI_FORCE_TYPE(std::complex<float>);
+  MPI_FORCE_TYPE(std::complex<double>);
+
+
+#undef MPI_FORCE_TYPE
+
+  template<typename BaseType, size_t Dim>
+  struct mpiVectorType {
+    typedef mpiVectorType<BaseType, Dim> Self;
+
+    inline MPI_Datatype type() const { return sizeof(BaseType)*Dim; }
+
+    static Self& instance() {
+      static Self variable;
+      return variable;
+    }
+    mpiVectorType() {}
+  };
+};
+
+#endif // MPI_TYPE_TRANSLATOR_HPP_INCLUDED
--- a/libLSS/mpi/generic_mpi.hpp
+++ b/libLSS/mpi/generic_mpi.hpp
@ -0,0 +1,35 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mpi/generic_mpi.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifdef ARES_MPI_FFTW 
+#define OMPI_SKIP_MPICXX
+#define _MPICC_H
+#include <mpi.h>
+#include "real_mpi/mpi_type_translator.hpp"
+#include "real_mpi/mpi_communication.hpp"
+
+#ifndef __LIBLSS_MPI_REAL_DEFINED
+#define __LIBLSS_MPI_REAL_DEFINED
+namespace LibLSS {
+  static constexpr bool MPI_IS_REAL = true;
+}
+#endif
+
+#else
+#include "fake_mpi/mpi_type_translator.hpp"
+#include "fake_mpi/mpi_communication.hpp"
+
+#ifndef __LIBLSS_MPI_REAL_DEFINED
+#define __LIBLSS_MPI_REAL_DEFINED
+namespace LibLSS {
+  static constexpr bool MPI_IS_REAL = false;
+}
+#endif
+#endif
+
--- a/libLSS/mpi/real_mpi/mpi_communication.cpp
+++ b/libLSS/mpi/real_mpi/mpi_communication.cpp
@ -0,0 +1,16 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mpi/real_mpi/mpi_communication.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#define OMPI_SKIP_MPICXX
+#define _MPICC_H
+#include <mpi.h>
+#include "mpi_type_translator.hpp"
+#include "mpi_communication.hpp"
+
+LibLSS::MPI_Communication *LibLSS::MPI_Communication::singleton = 0;
--- a/libLSS/mpi/real_mpi/mpi_communication.hpp
+++ b/libLSS/mpi/real_mpi/mpi_communication.hpp
@ -0,0 +1,834 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mpi/real_mpi/mpi_communication.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+
+#ifndef __LIBLSS_REAL_MPI_COMMUNICATION_HPP
+#define __LIBLSS_REAL_MPI_COMMUNICATION_HPP
+
+#include <boost/format.hpp>
+#include <cstdlib>
+#include <iostream>
+#include <boost/multi_array.hpp>
+#include "libLSS/tools/openmp.hpp"
+
+namespace LibLSS {
+
+  /**
+   * @brief Wrapper class to handle MPI exceptions.
+   * 
+   */
+  class MPI_Exception : virtual std::exception {
+  public:
+    /**
+     * @brief Construct a new mpi exception object
+     * 
+     * @param err  MPI Error code
+     */
+    MPI_Exception(int err) {
+      char s[MPI_MAX_ERROR_STRING];
+      int l;
+
+      MPI_Error_string(err, s, &l);
+      err_string = s;
+    }
+
+    /**
+     * @brief Return the error message
+     * 
+     * @return const char* 
+     */
+    const char *what() const throw() override { return err_string.c_str(); }
+
+    /**
+     * @brief Return the MPI error code
+     * 
+     * @return int 
+     */
+    int code() const { return errcode; }
+
+    virtual ~MPI_Exception() throw() {}
+
+  private:
+    std::string err_string;
+    int errcode;
+  };
+
+  class MPI_Communication;
+
+  class MPICC_Request {
+  public:
+    MPI_Request request;
+    int tofrom_rank;
+    bool active;
+
+    MPICC_Request() : active(false) {}
+
+    void set(MPI_Request r) {
+      request = r;
+      active = true;
+    }
+
+    bool is_active() const { return active; }
+
+    bool test(MPI_Status *status = MPI_STATUS_IGNORE) {
+      int flag;
+      int err;
+
+      if (!active)
+        return true;
+
+      if ((err = MPI_Test(&request, &flag, status)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+      return flag != 0;
+    }
+
+    void free() {
+      int err;
+
+      if (!active)
+        return;
+
+      if ((err = MPI_Request_free(&request)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void wait(MPI_Status *status = MPI_STATUS_IGNORE) {
+      int err;
+
+      if (!active)
+        return;
+
+      if ((err = MPI_Wait(&request, status)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+  };
+
+  typedef boost::multi_array<MPICC_Request, 1> RequestArray;
+  typedef boost::multi_array<MPI_Status, 1> StatusArray;
+
+  class MPICC_Window {
+  public:
+    MPI_Communication *Comm;
+    MPI_Win win;
+    void *wp;
+    int size;
+    int rank;
+
+    void lock(bool shared = false) {
+      int err;
+      if ((err = MPI_Win_lock(
+               shared ? MPI_LOCK_SHARED : MPI_LOCK_EXCLUSIVE, rank, 0, win)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void unlock() {
+      int err;
+
+      if ((err = MPI_Win_unlock(rank, win)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void fence() { MPI_Win_fence(rank, win); }
+
+    void destroy() {
+      MPI_Win_free(&win);
+      if (wp != 0)
+        MPI_Free_mem(wp);
+    }
+
+    template <typename T>
+    void put(int r, T v);
+
+    template <typename T>
+    T get(int r);
+
+    template <typename T>
+    T *get_ptr() {
+      return (T *)wp;
+    }
+
+    template <typename T>
+    const T *get_ptr() const {
+      return (const T *)wp;
+    }
+  };
+
+  class MPICC_Mutex {
+  public:
+    MPICC_Mutex(MPI_Comm comm, int tag);
+    ~MPICC_Mutex();
+
+    void acquire();
+    void release();
+
+  protected:
+    MPI_Comm comm;
+    MPI_Win win;
+    int *lockArray;
+    int host_rank;
+    int mutex_tag;
+  };
+
+  /**
+   * @brief Wrapper for MPI communication object.
+   * 
+   */
+  class MPI_Communication {
+  private:
+    MPI_Comm comm0;
+    int cur_rank, cur_size;
+    bool free_on_destroy;
+
+    friend MPI_Communication *setupMPI();
+    friend MPI_Communication *setupMPI(int &argc, char **&argv);
+    friend MPI_Communication *setupMPI(MPI_Comm w);
+
+    static MPI_Communication *singleton;
+
+  public:
+    typedef MPICC_Request Request;
+
+    /**
+     * @brief Returns the world communicator.
+     * 
+     * @return MPI_Communication* 
+     */
+    static MPI_Communication *instance() { return singleton; }
+
+    /**
+     * @brief Construct a new mpi communication object based on a MPI_Comm instance.
+     * 
+     * @param mcomm        MPI_Comm instance
+     * @param auto_free    if true, the instance will be discarded on destruction
+     */
+    MPI_Communication(MPI_Comm mcomm, bool auto_free = false)
+        : comm0(mcomm), free_on_destroy(auto_free) {
+      //      MPI_Comm_set_errhandler(comm, MPI_ERRORS_RETURN);
+      MPI_Comm_rank(comm0, &cur_rank);
+      MPI_Comm_size(comm0, &cur_size);
+    }
+
+    ~MPI_Communication() {
+      if (free_on_destroy)
+        MPI_Comm_free(&comm0);
+    }
+
+    MPI_Communication *split(int color = MPI_UNDEFINED, int key = 0) {
+      MPI_Comm newcomm;
+      int err;
+
+      if ((err = MPI_Comm_split(comm0, color, key, &newcomm)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+      if (newcomm == MPI_COMM_NULL)
+        return 0;
+      return new MPI_Communication(newcomm, true);
+    }
+
+    MPICC_Mutex *new_mutex(int tag) { return new MPICC_Mutex(comm0, tag); }
+
+    /**
+     * @brief Returns the rank of the node in the communicator
+     * 
+     * @return int 
+     */
+    int rank() const { return cur_rank; }
+
+    /**
+     * @brief Returns the size of the communicator
+     * 
+     * @return int 
+     */
+    int size() const { return cur_size; }
+
+    /**
+     * @brief Returns the underlyind MPI_Comm instance
+     * 
+     * @return MPI_Comm 
+     */
+    MPI_Comm comm() { return comm0; }
+
+    /**
+     * @brief Triggers an abort action on the communication.
+     * 
+     * That action is generally fatal to the program.
+     * 
+     */
+    void abort() { MPI_Abort(comm0, 99); }
+
+    MPICC_Window win_create(int size, int disp_unit) {
+      MPICC_Window w;
+      int err;
+
+      w.rank = 0;
+      w.Comm = this;
+
+      if (rank() == w.rank) {
+        if ((err = MPI_Alloc_mem(size, MPI_INFO_NULL, &w.wp)) != MPI_SUCCESS)
+          throw MPI_Exception(err);
+      } else {
+        size = 0;
+        disp_unit = 1;
+        w.wp = 0;
+      }
+      if ((err = MPI_Win_create(
+               w.wp, size, disp_unit, MPI_INFO_NULL, comm0, &w.win)) !=
+          MPI_SUCCESS) {
+        if (w.wp != 0)
+          MPI_Free_mem(w.wp);
+        throw MPI_Exception(err);
+      }
+      MPI_Win_fence(0, w.win);
+      return w;
+    }
+
+    void send_recv(
+        const void *sendbuf, int sendcount, MPI_Datatype sdatatype, int dest,
+        int sendtag, void *recvbuf, int recvcount, MPI_Datatype rdatatype,
+        int source, int recvtag, MPI_Status *s = MPI_STATUS_IGNORE) {
+      int err;
+      if ((err = MPI_Sendrecv(
+               (void *)sendbuf, sendcount, sdatatype, dest, sendtag, recvbuf,
+               recvcount, rdatatype, source, recvtag, comm0, s)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    /**
+     * @brief Send a buffer to another MPI task
+     * 
+     * @param buf         buffer holding the objects to be sent
+     * @param count       number count of objects
+     * @param datatype    datatypes of the objects
+     * @param dest        rank of the destination
+     * @param tag         tag attached to the send
+     */
+    void
+    send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag) {
+      int err;
+      using boost::format;
+      using boost::str;
+
+      if ((err = MPI_Send((void *)buf, count, datatype, dest, tag, comm0)) !=
+          MPI_SUCCESS) {
+        throw MPI_Exception(err);
+      }
+    }
+
+    /**
+     * @brief *Immediately* receive a buffer from another MPI task
+     * 
+     * This immediately triggers the reception. The receive is not
+     * guaranteed till a successful wait on the return object.
+     * 
+     * @param buf         buffer holding the objects to be sent
+     * @param count       number count of objects
+     * @param datatype    datatypes of the objects
+     * @param from        rank of the destination
+     * @param tag         tag attached to the message
+     * @return Request   the pending request
+     * @see LibLSS::MPI_Communication::recv
+     */
+    Request
+    Irecv(void *buf, int count, MPI_Datatype datatype, int from, int tag) {
+      int err;
+      Request req;
+      MPI_Request r;
+
+      req.tofrom_rank = from;
+      if ((err = MPI_Irecv(buf, count, datatype, from, tag, comm0, &r)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+      req.set(r);
+      return req;
+    }
+
+    Request
+    Isend(void *buf, int count, MPI_Datatype datatype, int to, int tag) {
+      int err;
+      Request req;
+      MPI_Request r;
+
+      req.tofrom_rank = to;
+      if ((err = MPI_Isend(buf, count, datatype, to, tag, comm0, &r)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+
+      req.set(r);
+      return req;
+    }
+
+    Request IallReduce(
+        const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+        MPI_Op op) {
+      int err;
+      Request req;
+      MPI_Request r;
+
+      if ((err = MPI_Iallreduce(
+               sendbuf, recvbuf, count, datatype, op, comm0, &r)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+
+      req.set(r);
+      return req;
+    }
+
+    Request Ireduce(
+        const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+        MPI_Op op, int root) {
+      int err;
+      Request req;
+      MPI_Request r;
+
+      req.tofrom_rank = root;
+      if ((err = MPI_Ireduce(
+               sendbuf, recvbuf, count, datatype, op, root, comm0, &r)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+
+      req.set(r);
+      return req;
+    }
+
+    template <typename T>
+    Request IallReduceT(const void *sendbuf, T *recvbuf, int count, MPI_Op op) {
+      return IallReduce(sendbuf, recvbuf, count, translateMPIType<T>(), op);
+    }
+
+    template <typename T>
+    Request
+    IreduceT(const void *sendbuf, T *recvbuf, int count, MPI_Op op, int root) {
+      return Ireduce(sendbuf, recvbuf, count, translateMPIType<T>(), op, root);
+    }
+
+    Request
+    Ibroadcast(void *buffer, int count, MPI_Datatype datatype, int root) {
+      int err;
+      Request req;
+      MPI_Request r;
+
+      req.tofrom_rank = root;
+      if ((err = MPI_Ibcast(buffer, count, datatype, root, comm0, &r)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+
+      req.set(r);
+      return req;
+    }
+
+    template <typename T>
+    Request IbroadcastT(T *buf, int count, int root) {
+      return Ibroadcast(buf, count, translateMPIType<T>(), root);
+    }
+
+    template <typename T>
+    Request IrecvT(T *buf, int count, int from, int tag) {
+      return Irecv(buf, count, translateMPIType<T>(), from, tag);
+    }
+
+    template <typename T>
+    Request IsendT(T *buf, int count, int from, int tag) {
+      return Isend(buf, count, translateMPIType<T>(), from, tag);
+    }
+
+    static void WaitAll(
+        std::vector<Request> &reqs,
+        std::vector<MPI_Status> &&statuses = std::vector<MPI_Status>()) {
+      boost::multi_array<MPI_Request, 1> req_array(boost::extents[reqs.size()]);
+
+      statuses.resize(reqs.size());
+      for (int i = 0; i < reqs.size(); i++)
+        req_array[i] = reqs[i].request;
+
+      MPI_Waitall(reqs.size(), req_array.data(), &statuses[0]);
+    }
+
+    static void WaitAll(RequestArray &reqs, StatusArray &statuses) {
+      boost::multi_array<MPI_Request, 1> req_array(
+          boost::extents[reqs.num_elements()]);
+      boost::multi_array<long, 1> req_assign(
+          boost::extents[reqs.num_elements()]);
+      long j = 0;
+
+      for (long i = 0; i < reqs.num_elements(); i++) {
+        if (!reqs[i].is_active())
+          continue;
+
+        req_array[j] = reqs[i].request;
+        req_assign[j] = i;
+        j++;
+      }
+
+      MPI_Waitall(j, req_array.data(), statuses.data());
+
+      for (long i = 0; i < j; i++) {
+        if (req_assign[i] != i)
+          // req_assign[i] >= i always
+          statuses[req_assign[i]] = statuses[i];
+      }
+    }
+
+    void recv(
+        void *buf, int count, MPI_Datatype datatype, int from, int tag,
+        MPI_Status *status = MPI_STATUS_IGNORE) {
+      int err;
+      MPI_Status my_status;
+      using boost::format;
+      using boost::str;
+      if ((err =
+               MPI_Recv(buf, count, datatype, from, tag, comm0, &my_status)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void reduce(
+        const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+        MPI_Op op, int root) {
+      int err;
+
+      if ((err = MPI_Reduce(
+               (void *)sendbuf, recvbuf, count, datatype, op, root, comm0)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void broadcast(
+        void *sendrecbuf, int sendrec_count, MPI_Datatype sr_type, int root) {
+      int err;
+
+      if ((err = MPI_Bcast(sendrecbuf, sendrec_count, sr_type, root, comm0)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void scatter(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, int recvcount, MPI_Datatype recvtype, int root) {
+      int err;
+
+      if ((err = MPI_Scatter(
+               (void *)sendbuf, sendcount, sendtype, recvbuf, recvcount,
+               recvtype, root, comm0)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void all_reduce(
+        const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+        MPI_Op op) {
+      int err;
+
+      if ((err = MPI_Allreduce(
+               (void *)sendbuf, recvbuf, count, datatype, op, comm0)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void all_gather(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, int recvcount, MPI_Datatype recvtype) {
+      int err;
+      if ((err = MPI_Allgather(
+               (void *)sendbuf, sendcount, sendtype, recvbuf, recvcount,
+               recvtype, comm0)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    void gather(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, int recvcount, MPI_Datatype recvtype, int root) {
+      int err;
+      if ((err = MPI_Gather(
+               (void *)sendbuf, sendcount, sendtype, recvbuf, recvcount,
+               recvtype, root, comm0)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    template <typename T>
+    void
+    reduce_t(const void *sendbuf, T *recvbuf, int count, MPI_Op op, int root) {
+      reduce(sendbuf, recvbuf, count, translateMPIType<T>(), op, root);
+    }
+
+    template <typename T>
+    void broadcast_t(T *sendrecbuf, int count, int root) {
+      broadcast(sendrecbuf, count, translateMPIType<T>(), root);
+    }
+
+    template <typename T>
+    void all_reduce_t(const void *sendbuf, T *recvbuf, int count, MPI_Op op) {
+      all_reduce(sendbuf, recvbuf, count, translateMPIType<T>(), op);
+    }
+
+    template <typename T>
+    void
+    all_gather_t(const T *sendbuf, int sendcount, T *recvbuf, int recvcount) {
+      all_gather(
+          sendbuf, sendcount, translateMPIType<T>(), recvbuf, recvcount,
+          translateMPIType<T>());
+    }
+
+    template <typename T>
+    void gather_t(
+        const T *sendbuf, int sendcount, T *recvbuf, int recvcount, int root) {
+      gather(
+          sendbuf, sendcount, translateMPIType<T>(), recvbuf, recvcount,
+          translateMPIType<T>());
+    }
+
+    Request Igather(
+        void const *sendbuf, int sendcount, MPI_Datatype sendtype, void *buf,
+        int recvcount, MPI_Datatype recvtype, int root) {
+      int err;
+      Request req;
+      MPI_Request r;
+
+      req.tofrom_rank = root;
+      if ((err = MPI_Igather(
+               sendbuf, sendcount, sendtype, buf, recvcount, recvtype, root,
+               comm0, &r)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+
+      req.set(r);
+      return req;
+    }
+
+    template <typename T>
+    Request
+    IgatherT(T const *sendbuf, int sendcount, T *buf, int recvcount, int root) {
+      return Igather(
+          sendbuf, sendcount, translateMPIType<T>(), buf, recvcount,
+          translateMPIType<T>(), root);
+    }
+
+    void barrier() {
+      int err;
+      if ((err = MPI_Barrier(comm0)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    template <typename T>
+    void accum(T *target_array, const T *source_array, int count, int root) {
+      MPI_Datatype t = translateMPIType<T>();
+
+      if (rank() == root) {
+        T *tmp_arr = new T[count];
+        for (int other = 0; other < size(); other++) {
+          if (other == root)
+            continue;
+          recv(tmp_arr, count, t, other, 0);
+          for (int j = 0; j < count; j++)
+            target_array[j] += tmp_arr[j];
+        }
+        delete[] tmp_arr;
+      } else {
+        send(source_array, count, t, root, 0);
+      }
+    }
+
+    void all2all(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, int recvcount, MPI_Datatype recvtype) {
+      int err;
+
+      if ((err = MPI_Alltoall(
+               (void *)sendbuf, sendcount, sendtype, recvbuf, recvcount,
+               recvtype, comm0)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    template <typename T>
+    void all2allT(const T *sendbuf, int sendcount, T *recvbuf, int recvcount) {
+      all2all(
+          sendbuf, sendcount, translateMPIType<T>(), recvbuf, recvcount,
+          translateMPIType<T>());
+    }
+
+    template <typename T>
+    void all_accum(T *ts_array, int count) {
+      MPI_Datatype t = translateMPIType<T>();
+
+      accum(ts_array, ts_array, count, 0);
+      if (rank() == 0) {
+        for (int other = 1; other < size(); other++)
+          send(ts_array, count, t, other, 0);
+      } else
+        recv(ts_array, count, t, 0, 0);
+    }
+
+    void all_gatherv(
+        const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+        void *recvbuf, const int recvcounts[], const int displs[],
+        MPI_Datatype recvtype) {
+      int err;
+      // Circumventing old buggy MPI implementation
+      if ((err = MPI_Allgatherv(
+               (void *)sendbuf, sendcount, sendtype, recvbuf,
+               (int *)&recvcounts[0], (int *)&displs[0], recvtype, comm0)) !=
+          MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    template <typename T>
+    void all_gatherv_t(
+        const T *sendbuf, int sendcount, T *recvbuf, const int *recvcounts,
+        const int *displs) {
+      all_gatherv(
+          sendbuf, sendcount, translateMPIType<T>(), recvbuf, recvcounts,
+          displs, translateMPIType<T>());
+    }
+
+    //for in place gathering, automatic type translation ha problems
+    template <typename T>
+    void all_gather_t(T *recvbuf, int recvcount) {
+      all_gather(
+          MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, recvbuf, recvcount,
+          translateMPIType<T>());
+    }
+
+    template <typename T>
+    void all_gatherv_t(T *recvbuf, const int *recvcounts, const int *displs) {
+      all_gatherv(
+          MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, recvbuf, recvcounts, displs,
+          translateMPIType<T>());
+    }
+
+    void all2allv(
+        const void *sendbuf, const int *sendcounts, const int *sdispls,
+        MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
+        const int *rdispls, MPI_Datatype recvtype) {
+      int err;
+      if ((err = MPI_Alltoallv(
+               sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts,
+               rdispls, recvtype, comm0)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+    }
+
+    template <typename T>
+    void all2allv_t(
+        const T *sendbuf, const int *sendcounts, const int *sdispls, T *recvbuf,
+        const int *recvcounts, const int *rdispls) {
+      all2allv(
+          sendbuf, sendcounts, sdispls, translateMPIType<T>(), recvbuf,
+          recvcounts, rdispls, translateMPIType<T>());
+    }
+
+    template <typename T>
+    Request Iall2allv_t(
+        const T *sendbuf, const int *sendcounts, const int *sdispls,
+        MPI_Datatype sendtype, T *recvbuf, const int *recvcounts,
+        const int *rdispls, MPI_Datatype recvtype) {
+      int err;
+      Request req;
+      MPI_Request r;
+
+      if ((err = MPI_IAlltoallv(
+               sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts,
+               rdispls, recvtype, comm0, &r)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+
+      req.set(r);
+      return req;
+    }
+
+    template <typename T>
+    Request Iall2allv_t(
+        const T *sendbuf, const int *sendcounts, const int *sdispls, T *recvbuf,
+        const int *recvcounts, const int *rdispls) {
+      return Iall2allv(
+          sendbuf, sendcounts, sdispls, translateMPIType<T>(), recvbuf,
+          recvcounts, rdispls, translateMPIType<T>());
+    }
+  };
+
+  template <typename T>
+  void MPICC_Window::put(int r, T v) {
+    int err;
+
+    MPI_Datatype t = translateMPIType<T>();
+    lock();
+    err = MPI_Put(&v, 1, t, rank, r, 1, t, win);
+    unlock();
+    if (err != MPI_SUCCESS)
+      throw MPI_Exception(err);
+  }
+
+  template <typename T>
+  T MPICC_Window::get(int r) {
+    int err;
+    T v;
+
+    v = 0;
+
+    MPI_Datatype t = translateMPIType<T>();
+    lock();
+    err = MPI_Get(&v, 1, t, rank, r, 1, t, win);
+    unlock();
+    if (err != MPI_SUCCESS) {
+      throw MPI_Exception(err);
+    }
+
+    return v;
+  }
+
+  inline MPI_Communication *setupMPI() {
+    int provided;
+#ifdef _OPENMP
+    std::cout << "setupMPI with threads (Nthreads=" << smp_get_max_threads()
+              << ")" << std::endl;
+    ::MPI_Init_thread(0, 0, MPI_THREAD_FUNNELED, &provided);
+    if (provided < MPI_THREAD_FUNNELED) {
+      std::cerr << "Cannot mix MPI and Threads here. Please recompile with "
+                   "OpenMP or MPI switched off."
+                << std::endl;
+      ::MPI_Abort(MPI_COMM_WORLD, 99);
+    }
+#else
+    std::cout << "setupMPI with *NO* threads" << std::endl;
+    ::MPI_Init(0, 0);
+#endif
+    MPI_Communication *w = new MPI_Communication(MPI_COMM_WORLD);
+
+    MPI_Communication::singleton = w;
+    return w;
+  }
+
+  inline MPI_Communication *setupMPI(int &argc, char **&argv) {
+    int provided;
+#ifdef _OPENMP
+    std::cout << "setupMPI with threads (Nthreads=" << smp_get_max_threads()
+              << ")" << std::endl;
+    ::MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
+    if (provided < MPI_THREAD_FUNNELED) {
+      std::cerr << "Cannot mix MPI and Threads here. Please recompile with "
+                   "OpenMP or MPI switched off."
+                << std::endl;
+      ::MPI_Abort(MPI_COMM_WORLD, 99);
+    }
+#else
+    std::cout << "setupMPI with *NO* threads" << std::endl;
+    ::MPI_Init(&argc, &argv);
+#endif
+    MPI_Communication *w = new MPI_Communication(MPI_COMM_WORLD);
+
+    MPI_Communication::singleton = w;
+    return w;
+  }
+
+  // This a manual setup. Be warned that no safety check is done here.
+  inline MPI_Communication *setupMPI(MPI_Comm existing) {
+    MPI_Communication *w = new MPI_Communication(MPI_COMM_WORLD);
+    MPI_Communication::singleton = w;
+    return w;
+  }
+
+  inline void doneMPI() { ::MPI_Finalize(); }
+
+}; // namespace LibLSS
+
+#endif
--- a/libLSS/mpi/real_mpi/mpi_mutex.cpp
+++ b/libLSS/mpi/real_mpi/mpi_mutex.cpp
@ -0,0 +1,203 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mpi/real_mpi/mpi_mutex.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include <mpi.h>
+#include "mpi_type_translator.hpp"
+#include "mpi_communication.hpp"
+
+using namespace CMB;
+using namespace std;
+
+MPICC_Mutex::MPICC_Mutex(MPI_Comm c, int mutex_tag)
+{
+  int err;
+  int size, rank;
+  int lockSize;
+  host_rank = 0;
+  this->mutex_tag = mutex_tag;
+
+  this->comm = c;
+
+  MPI_Comm_size(c, &size);
+  MPI_Comm_rank(c, &rank);  
+
+  if (rank == host_rank)
+    {
+      lockSize = size * sizeof(int);      
+      if ((err = MPI_Alloc_mem(lockSize, MPI_INFO_NULL, &lockArray)) != MPI_SUCCESS)
+          throw MPI_Exception(err);
+
+      for (int i = 0; i < size; i++)
+        lockArray[i] = 0;
+    }
+  else
+    {
+      lockArray = 0;
+      lockSize = 0;
+    }
+
+  if ((err = MPI_Win_create(lockArray, lockSize, sizeof(int), MPI_INFO_NULL, comm, &win)) != MPI_SUCCESS)
+    {
+      if (lockArray != 0)
+        MPI_Free_mem(lockArray);
+      throw MPI_Exception(err);
+    }
+}
+
+MPICC_Mutex::~MPICC_Mutex()
+{
+  MPI_Win_free(&win);
+
+  if (lockArray != 0)
+    MPI_Free_mem(lockArray);
+}
+
+void MPICC_Mutex::acquire()
+{
+  int err;
+  int size, rank;
+  int *all_locks;
+
+  MPI_Comm_size(comm, &size);
+  MPI_Comm_rank(comm, &rank);  
+
+  all_locks = new int[size];
+
+  try
+    {
+      bool already_locked = false;
+      (std::cout << "[" << rank << "] Try to obtain lock" << std::endl).flush();
+      do {
+        all_locks[rank] = 1;
+        err = MPI_Win_lock(MPI_LOCK_EXCLUSIVE, host_rank, 0, win);
+        assert(err==MPI_SUCCESS);
+
+        err = MPI_Put(all_locks+rank, 1, MPI_INT,
+                      host_rank,
+                      rank, 1, MPI_INT, win);
+        assert(err == MPI_SUCCESS);
+        if (rank > 0)
+          {
+            err = MPI_Get(all_locks, rank, MPI_INT,
+                        host_rank,
+                        0, rank, MPI_INT, win);
+            assert(err == MPI_SUCCESS);
+          }
+
+        if (rank < size-1)
+          {
+            err = MPI_Get(all_locks+rank+1, size-rank-1, MPI_INT,
+                          host_rank,
+                          rank+1, size-rank-1, MPI_INT, win);
+            assert(err == MPI_SUCCESS);
+          }
+
+      if ((err = MPI_Win_unlock(host_rank, win)) != MPI_SUCCESS)
+        throw MPI_Exception(err);
+  
+      assert(all_locks[rank] == 1);
+
+      already_locked = false;
+      int whose_lock = -1;
+      for (int i = 0; i < size; i++)
+        if (i != rank && all_locks[i] != 0)
+          {
+            already_locked = true;
+            whose_lock = i;
+            break;
+          }
+
+      if (false&&already_locked) {
+        // Failure release it.
+        err = MPI_Win_lock(MPI_LOCK_EXCLUSIVE, host_rank, 0, win);
+        all_locks[rank] = 0;
+        err = MPI_Put(all_locks+rank, 1, MPI_INT,
+                      host_rank,
+                      rank, 1, MPI_INT, win);
+        assert(err == MPI_SUCCESS);
+        err = MPI_Win_unlock(host_rank, win);
+      }
+
+      if (already_locked)
+        {
+          MPI_Status status;
+          int v = 0;
+          (std::cout << "[" << rank << "] Blocking" << std::endl).flush();
+          MPI_Recv(&v, 1, MPI_BYTE, MPI_ANY_SOURCE, mutex_tag, comm, &status);
+          already_locked = false;
+        }
+     } while (already_locked);
+     (std::cout << "[" << rank << "] Obtained lock" << std::endl).flush();
+    }
+  catch (MPI_Exception& e)
+    {
+      delete[] all_locks;
+      throw e;
+    }
+
+  delete[] all_locks;
+}
+
+void MPICC_Mutex::release()
+{
+  int err;
+  int rank, size;
+  int *all_locks;
+
+  MPI_Comm_size(comm, &size);
+  MPI_Comm_rank(comm, &rank);  
+
+  all_locks = new int[size];
+  all_locks[rank] = 0;
+
+  if ((err = MPI_Win_lock(MPI_LOCK_EXCLUSIVE, host_rank, 0, win)) != MPI_SUCCESS)
+    throw MPI_Exception(err);
+
+  err = MPI_Put(all_locks+rank, 1, MPI_INT,
+                host_rank,
+                rank, 1, MPI_INT, win);
+  assert(err == MPI_SUCCESS);
+  if (rank > 0)
+    {
+      err = MPI_Get(all_locks, rank, MPI_INT,
+                    host_rank,
+                    0, rank, MPI_INT, win);
+      assert(err == MPI_SUCCESS);
+    }
+
+  if (rank < size-1)
+    {
+      err = MPI_Get(all_locks+rank+1, size-rank-1, MPI_INT,
+                    host_rank,
+                    rank+1, size-rank-1, MPI_INT, win);
+      assert(err == MPI_SUCCESS);
+    }
+
+  if ((err = MPI_Win_unlock(host_rank, win)) != MPI_SUCCESS)
+    throw MPI_Exception(err);
+
+  assert(all_locks[rank] == 0);
+
+  for (int i = 0; i < size; i++)
+    {
+      int p = (rank+i) % size;
+      if (p!= rank && all_locks[p] != 0)
+      {
+        MPI_Status status;
+        int v = 0;
+        (std::cout << "[" << rank << "] Releasing  " << p << std::endl).flush();
+        MPI_Send(&v, 1, MPI_BYTE, p, mutex_tag, comm);
+        break;
+      }
+    }
+  delete[] all_locks;
+
+}
+
--- a/libLSS/mpi/real_mpi/mpi_type_translator.hpp
+++ b/libLSS/mpi/real_mpi/mpi_type_translator.hpp
@ -0,0 +1,160 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/mpi/real_mpi/mpi_type_translator.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+
+#ifndef LIBLSS_MPI_TYPE_TRANSLATOR_HPP_INCLUDED
+#define LIBLSS_MPI_TYPE_TRANSLATOR_HPP_INCLUDED
+
+#include <complex>
+#include <iostream>
+#include <cstdlib>
+#include <tuple>
+
+namespace LibLSS
+{
+  template<typename T>
+  MPI_Datatype translateMPIType();
+
+#define MPI_FORCE_TYPE(T, val) \
+  template<> \
+  inline MPI_Datatype translateMPIType<T>() \
+  { \
+    return val; \
+  }
+
+#define MPI_FORCE_COMPOUND_TYPE(T) \
+  template<> \
+  inline MPI_Datatype translateMPIType<T>() \
+  { \
+     return MPI_CompoundType<T>::instance().datatype; \
+  }
+
+  MPI_FORCE_TYPE(int, MPI_INT);
+  MPI_FORCE_TYPE(double, MPI_DOUBLE);
+  MPI_FORCE_TYPE(float, MPI_FLOAT);
+  MPI_FORCE_TYPE(long, MPI_LONG);
+  MPI_FORCE_TYPE(bool, MPI_INT);
+  MPI_FORCE_TYPE(unsigned long, MPI_UNSIGNED_LONG);
+  MPI_FORCE_TYPE(unsigned long long, MPI_LONG_LONG_INT);
+  MPI_FORCE_TYPE(unsigned int, MPI_UNSIGNED);
+
+  struct MPI_GenericCompoundType {
+    MPI_Datatype datatype;
+    ~MPI_GenericCompoundType() {
+  // FIXME: See how to properly free the type before MPI_Finalize
+  //    MPI_Type_free(&datatype);
+    }
+  };
+
+  template<typename T>
+  struct MPI_CompoundType {};
+
+  template<typename T> struct MPI_CompoundType<std::complex<T> >: MPI_GenericCompoundType {
+      static MPI_CompoundType<std::complex<T> >& instance() {
+          static MPI_CompoundType<std::complex<T> > variable;
+          return variable;
+      }
+
+      MPI_CompoundType<std::complex<T> >() {
+          (std::cerr << "Creating complex type " << std::endl).flush();
+          int ret = MPI_Type_contiguous(2, translateMPIType<T>(), &datatype);
+
+          if (ret != MPI_SUCCESS) {
+            (std::cerr << "Error while creating types for complexes. Code was " << ret << std::endl).flush();
+            ::abort();
+          }
+          MPI_Type_commit(&datatype);
+      }
+  };
+
+  MPI_FORCE_COMPOUND_TYPE(std::complex<float>);
+  MPI_FORCE_COMPOUND_TYPE(std::complex<double>);
+
+  #undef MPI_FORCE_TYPE
+
+  namespace internal_compound_helper {
+    template <size_t Idx, typename Tuple>
+    struct _offset_helper {
+      static void fill_displacement(MPI_Aint *displ) {
+        _offset_helper<Idx - 1, Tuple>::fill_displacement(displ);
+        displ[Idx] = (ptrdiff_t)&std::get<Idx>(*(Tuple *)0);
+      }
+    };
+
+    template <typename Tuple>
+    struct _offset_helper<0, Tuple> {
+      static void fill_displacement(MPI_Aint *displ) {
+        displ[0] = (ptrdiff_t)&std::get<0>(*(Tuple *)0);
+      }
+    };
+  } // namespace internal_compound_helper
+
+  template <typename... Args>
+  struct MPI_CompoundType<std::tuple<Args...>> : MPI_GenericCompoundType {
+
+    typedef std::tuple<Args...> Tuple;
+
+    static MPI_CompoundType<std::tuple<Args...>> &instance() {
+      static MPI_CompoundType<std::tuple<Args...>> variable;
+      return variable;
+    }
+
+    MPI_CompoundType<std::tuple<Args...>>() {
+      using namespace internal_compound_helper;
+      constexpr size_t N = sizeof...(Args);
+      MPI_Datatype types[N] = {translateMPIType<Args>()...};
+      int len[N];
+      MPI_Aint displacement[N];
+
+      std::fill(len, len + N, 1);
+      _offset_helper<N - 1, Tuple>::fill_displacement(displacement);
+
+#if !defined(MPI_VERSION) || (MPI_VERSION < 3)
+      int ret = MPI_Type_struct(N, len, displacement, types, &datatype);
+#else
+      int ret = MPI_Type_create_struct(N, len, displacement, types, &datatype);
+#endif
+
+      if (ret != MPI_SUCCESS) {
+        (std::cerr
+         << "Error while creating types for tuple compound type. Code was "
+         << ret << std::endl)
+            .flush();
+        ::abort();
+      }
+      MPI_Type_commit(&datatype);
+    }
+  };
+
+   
+  template<typename BaseType, size_t Dim>
+  struct mpiVectorType {
+    typedef mpiVectorType<BaseType, Dim> Self;
+    MPI_Datatype datatype;
+
+    inline MPI_Datatype type() const { return datatype; }
+
+    static Self& instance() {
+      static Self variable;
+      return variable;
+    }
+
+    mpiVectorType() {
+      int ret = MPI_Type_contiguous(Dim, translateMPIType<BaseType>(), &datatype);
+
+      if (ret != MPI_SUCCESS) {
+        ::abort();
+      }
+      MPI_Type_commit(&datatype);
+    }
+  };
+
+};
+
+#endif // MPI_TYPE_TRANSLATOR_HPP_INCLUDED
--- a/libLSS/physics/class_cosmo.cpp
+++ b/libLSS/physics/class_cosmo.cpp
@ -0,0 +1,399 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/class_cosmo.cpp
+    Copyright (C) 2020 Jens Jasche <jens.jasche@fysik.su.se>
+    Copyright (C) 2021 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include <fstream>
+#include <locale.h>
+#include <boost/algorithm/string/trim.hpp>
+#include "libLSS/tools/console.hpp"
+#include "class_cosmo.hpp"
+#include <class_code/class.h>
+#include "libLSS/tools/errors.hpp"
+#include "libLSS/tools/string_tools.hpp"
+#include "libLSS/tools/auto_interpolator.hpp"
+
+using namespace LibLSS;
+using namespace std;
+
+namespace LibLSS {
+  struct OpaqueClass {
+    struct precision pr;  // for precision parameters
+    struct background ba; // for cosmological background
+    struct thermo th;     // for thermodynamics
+    struct perturbs pt;   // for source functions
+    struct transfers tr;  // for transfer functions
+    struct primordial pm; // for primordial spectra
+    struct spectra sp;    // for output spectra
+    struct nonlinear nl;  // for non-linear spectra
+    struct lensing le;    // for lensed spectra
+    struct output op;     // for output files
+    ErrorMsg errmsg;      // for error messages
+
+    bool bg_init, th_init, pt_init, prim_init;
+
+    OpaqueClass() {
+      bg_init = false;
+      th_init = false;
+      pt_init = false;
+      prim_init = false;
+      ba.N_ncdm = 0;
+    }
+
+    ~OpaqueClass() {
+      if (ba.N_ncdm > 0)
+        delete[] ba.Omega0_ncdm;
+      if (bg_init)
+        background_free(&ba);
+      if (th_init)
+        thermodynamics_free(&th);
+      if (pt_init)
+        perturb_free(&pt);
+      if (prim_init)
+        primordial_free(&pm);
+    }
+
+    LibLSS::auto_interpolator<double> interpolate_mTk;
+  };
+} // namespace LibLSS
+
+ClassCosmo::ClassCosmo(CosmologicalParameters const &cosmo) {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+
+  numInterpolationPoints = 1024;
+  opaque = std::make_unique<OpaqueClass>();
+
+  std::string previous_locale = std::string(setlocale(LC_NUMERIC, 0));
+  // CLASS is not safe w.r.t Locale settings. It reads table with sscanf which
+  // is sensitive to the locale setup.
+  setlocale(LC_NUMERIC, "C");
+
+  try {
+    // Set all class values to default
+    if (input_default_params(
+            &opaque->ba, &opaque->th, &opaque->pt, &opaque->tr, &opaque->pm,
+            &opaque->sp, &opaque->nl, &opaque->le, &opaque->op) == _FAILURE_) {
+      ctx.format2<LOG_ERROR>(
+          "Error running input_default_params => %s", opaque->op.error_message);
+      error_helper<ErrorBadState>("Error in CLASS");
+    }
+
+    {
+      auto &pba = opaque->ba;
+      double
+          sigma_B; /* Stefan-Boltzmann constant in \f$ W/m^2/K^4 = Kg/K^4/s^3 \f$*/
+
+      sigma_B =
+          2. * pow(_PI_, 5) * pow(_k_B_, 4) / 15. / pow(_h_P_, 3) / pow(_c_, 2);
+
+      double omega_cdm = cosmo.omega_m - cosmo.omega_b;
+      double Omega_tot = 0;
+
+      pba.h = cosmo.h;
+      pba.H0 = pba.h * 1.e5 / _c_;
+      pba.Omega0_g = (4. * sigma_B / _c_ * pow(pba.T_cmb, 4.)) /
+                     (3. * _c_ * _c_ * 1.e10 * pba.h * pba.h / _Mpc_over_m_ /
+                      _Mpc_over_m_ / 8. / _PI_ / _G_);
+      Omega_tot += pba.Omega0_g;
+      pba.Omega0_ur = 3.046 * 7. / 8. * pow(4. / 11., 4. / 3.) * pba.Omega0_g;
+      Omega_tot += pba.Omega0_ur;
+      pba.Omega0_idr = 0.0;
+      Omega_tot += pba.Omega0_idr;
+      pba.Omega0_idm_dr = 0.0;
+      pba.T_idr = 0.0;
+      pba.Omega0_b = cosmo.omega_b;
+      Omega_tot += pba.Omega0_b;
+      pba.Omega0_cdm = omega_cdm;
+      Omega_tot += pba.Omega0_cdm;
+
+      {
+        // CLP parametrization
+        pba.fluid_equation_of_state = CLP;
+        pba.w0_fld = cosmo.w;
+        pba.wa_fld = cosmo.wprime;
+        pba.Omega0_fld = cosmo.omega_q;
+        Omega_tot += pba.Omega0_fld;
+      }
+      pba.Omega0_k = cosmo.omega_k;
+
+      pba.N_ncdm = 1;
+      pba.Omega0_ncdm = new double[1];
+
+      pba.Omega0_ncdm[0] = cosmo.sum_mnu;
+
+      opaque->pt.alpha_idm_dr = nullptr;
+      opaque->pt.beta_idr = nullptr;
+
+      pba.Omega0_lambda = 1 - pba.Omega0_k - Omega_tot;
+
+      pba.K = -pba.Omega0_k * pow(pba.a_today * pba.H0, 2);
+      /** - Set curvature sign */
+      if (pba.K > 0.)
+        pba.sgnK = 1;
+      else if (pba.K < 0.)
+        pba.sgnK = -1;
+    }
+
+    // Set all class precision values to default
+    if (input_default_precision(&opaque->pr) == _FAILURE_) {
+      ctx.format2<LOG_ERROR>(
+          "Error running input_default_precision => %s",
+          opaque->pr.error_message);
+      error_helper<ErrorBadState>("Error in CLASS");
+    }
+    opaque->pr.k_per_decade_for_pk = 30;
+
+    //initialize background calculations
+    if (background_init(&opaque->pr, &opaque->ba) == _FAILURE_) {
+      ctx.format2<LOG_ERROR>(
+          "Error running background_init => %s", opaque->ba.error_message);
+      error_helper<ErrorBadState>("Error in CLASS");
+    }
+    opaque->bg_init = true;
+
+    //opaque->th.thermodynamics_verbose = _TRUE_;
+    if (thermodynamics_init(&opaque->pr, &opaque->ba, &opaque->th) ==
+        _FAILURE_) {
+      ctx.format2<LOG_ERROR>(
+          "Error running thermodynamics_init => %s", opaque->th.error_message);
+      error_helper<ErrorBadState>("Error in CLASS");
+    }
+    opaque->th_init = true;
+
+    opaque->pt.has_perturbations = _TRUE_;
+    //opaque->pt.perturbations_verbose = 1;
+    opaque->pt.has_pk_matter = _TRUE_;
+    opaque->pt.has_density_transfers = _TRUE_;
+    opaque->pt.has_cls = _FALSE_;
+    //opaque->pt.k_max_for_pk = ;
+
+    if (perturb_init(&opaque->pr, &opaque->ba, &opaque->th, &opaque->pt) ==
+        _FAILURE_) {
+      ctx.format2<LOG_ERROR>(
+          "Error running perturb_init => %s", opaque->pt.error_message);
+      error_helper<ErrorBadState>("Error in CLASS");
+    }
+    opaque->pt_init = true;
+
+    if (primordial_init(&opaque->pr, &opaque->pt, &opaque->pm) == _FAILURE_) {
+      ctx.format2<LOG_ERROR>(
+          "Error running primordial_init => %s", opaque->pm.error_message);
+      error_helper<ErrorBadState>("Error in CLASS");
+    }
+    opaque->prim_init = true;
+
+    retrieve_Tk();
+  } catch (std::exception &e) {
+    setlocale(LC_NUMERIC, previous_locale.c_str());
+    throw;
+  }
+  setlocale(LC_NUMERIC, previous_locale.c_str());
+}
+
+double ClassCosmo::primordial_Pk(double k) {
+  //Input: wavenumber k in 1/Mpc (linear mode)
+  //Output: primordial spectra P(k) in \f$Mpc^3\f$ (linear mode)
+
+  double output;
+
+  primordial_spectrum_at_k(
+      &opaque->pm,
+      0, //choose scalar mode
+      linear, k, &output);
+
+  return output;
+}
+
+double ClassCosmo::get_Tk(double k) {
+  return -std::exp(opaque->interpolate_mTk(std::log(k)));
+}
+
+void ClassCosmo::retrieve_Tk() {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+
+  char *c_titles;
+  std::string titles;
+  double const output_redshift = 0;
+
+  // Query the available columns
+  c_titles = new char[_MAXTITLESTRINGLENGTH_];
+  std::fill(c_titles, c_titles + _MAXTITLESTRINGLENGTH_, 0);
+  if (perturb_output_titles(&opaque->ba, &opaque->pt, class_format, c_titles) ==
+      _FAILURE_) {
+    delete[] c_titles;
+    ctx.format2<LOG_ERROR>(
+        "Error running perturb_output_titles => %s", opaque->pt.error_message);
+    error_helper<ErrorBadState>("Error in CLASS");
+  }
+  titles = c_titles;
+  delete[] c_titles;
+
+  // Retrieve relevant data
+  auto names = LibLSS::tokenize(boost::algorithm::trim_copy(titles), "\t");
+  ctx.print(LibLSS::to_string(names));
+  auto index_md = opaque->pt.index_md_scalars;
+  auto number_of_titles = names.size();
+  auto number_of_ic = opaque->pt.ic_size[index_md];
+  auto timesteps = opaque->pt.k_size[index_md];
+  auto size_ic_data = timesteps * number_of_titles;
+  auto ic_num = opaque->pt.ic_size[index_md];
+
+  auto data = new double[size_ic_data * ic_num];
+
+  if (perturb_output_data(
+          &opaque->ba, &opaque->pt, class_format, output_redshift,
+          number_of_titles, data) == _FAILURE_) {
+    delete[] data;
+    ctx.format2<LOG_ERROR>(
+        "Error running perturb_output_data => %s", opaque->pt.error_message);
+    error_helper<ErrorBadState>("Error in CLASS");
+  }
+
+  // Adiabatic mode is referenced at opaque->pt.index_ic_ad
+  auto index_ic = opaque->pt.index_ic_ad;
+  auto result_k = std::find(names.begin(), names.end(), "k (h/Mpc)");
+  Console::instance().c_assert(
+      result_k != names.end(), "Invalid returned arrays for k from CLASS");
+  auto k_title = std::distance(names.begin(), result_k);
+  auto result = std::find(names.begin(), names.end(), "d_tot");
+  Console::instance().c_assert(
+      result != names.end(), "Invalid returned arrays from CLASS");
+  auto mTk_title = std::distance(names.begin(), result);
+
+  ctx.format("k_title=%d, mTk_title=%d", k_title, mTk_title);
+
+  auto get_data = [&](size_t step, size_t title) {
+    return data[index_ic * size_ic_data + step * number_of_titles + title];
+  };
+
+  array_1d k, Tk;
+
+  k.resize(boost::extents[timesteps]);
+  Tk.resize(boost::extents[timesteps]);
+
+  for (size_t step = 0; step < timesteps; step++) {
+    Tk[step] = -get_data(
+        step, mTk_title); // Laplacian between density and potential is negative
+    k[step] = get_data(step, k_title);
+  }
+
+  reinterpolate(k, Tk);
+
+  delete[] data;
+}
+
+ClassCosmo::~ClassCosmo() {}
+
+void ClassCosmo::reinterpolate(array_ref_1d const &k, array_ref_1d const &Tk) {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+
+  double k_min = opaque->pt.k_min / opaque->ba.h;
+  double k_max = opaque->pt.k_max / opaque->ba.h;
+
+  double delta_ln_k = std::log(k_max / k_min) / (numInterpolationPoints + 1);
+  double log_k_min = std::log(k_min);
+  double log_k_max = std::log(k_max);
+  size_t i_in_k = 0;
+
+  auto newTk =
+      new boost::multi_array<double, 1>(boost::extents[numInterpolationPoints]);
+
+  ctx.format(
+      "numInterpolationPoints = %d, k.size() = %d, k_min=%g, k_max=%g",
+      numInterpolationPoints, k.size(), k_min, k_max);
+  for (size_t i = 0; i < numInterpolationPoints; i++) {
+    double target_k = std::exp(delta_ln_k * i + log_k_min);
+    while (k[i_in_k] < target_k && i_in_k < k.size()) {
+      i_in_k++;
+    }
+
+    Console::instance().c_assert(i_in_k < k.size(), "Bad reinterpolation");
+    if (i_in_k == 0 && k[i_in_k] == k_min) {
+      (*newTk)[i] = std::log(Tk[0]);
+    } else if (k[i_in_k - 1] == 0) {
+      (*newTk)[i] =
+          std::log(Tk[i_in_k]) / std::log(k[i_in_k]) * std::log(target_k);
+    } else {
+      double alpha = std::log(target_k / k[i_in_k - 1]) /
+                     std::log(k[i_in_k] / k[i_in_k - 1]);
+      Console::instance().c_assert(
+          alpha > 0 && alpha < 1, "Bad alpha for interpolation");
+      (*newTk)[i] =
+          std::log(Tk[i_in_k - 1]) * (1 - alpha) + std::log(Tk[i_in_k]) * alpha;
+    }
+  }
+
+  opaque->interpolate_mTk = LibLSS::auto_interpolator<double>(
+      log_k_min, log_k_max, delta_ln_k, std::log(Tk[0]), 0.0, newTk);
+  opaque->interpolate_mTk.setThrowOnOverflow();
+}
+
+void ClassCosmo::updateCosmo() {
+  //ba.h = 0.67556;
+  auto &ba = opaque->ba;
+  auto &pba = opaque->ba;
+
+  ba.H0 = pba.h * 1.e5 / _c_;
+  ba.T_cmb = 2.7255;
+  ba.Omega0_b = 0.022032 / pow(pba.h, 2);
+  ba.Omega0_cdm = 0.12038 / pow(pba.h, 2);
+  ba.Omega0_dcdmdr = 0.0;
+  ba.Omega0_dcdm = 0.0;
+  ba.Gamma_dcdm = 0.0;
+  ba.N_ncdm = 0;
+  ba.Omega0_ncdm_tot = 0.;
+  ba.ksi_ncdm_default = 0.;
+  ba.ksi_ncdm = NULL;
+
+  ba.Omega0_scf = 0.; // Scalar field defaults
+  ba.attractor_ic_scf = _TRUE_;
+  ba.scf_parameters = NULL;
+  ba.scf_parameters_size = 0;
+  ba.scf_tuning_index = 0;
+
+  ba.Omega0_k = 0.;
+  ba.K = 0.;
+  ba.sgnK = 0;
+  ba.Omega0_lambda = 1. - pba.Omega0_k - pba.Omega0_g - pba.Omega0_ur -
+                     pba.Omega0_b - pba.Omega0_cdm - pba.Omega0_ncdm_tot -
+                     pba.Omega0_dcdmdr - pba.Omega0_idm_dr - pba.Omega0_idr;
+  ba.Omega0_fld = 0.;
+  ba.w0_fld = -1.;
+  ba.wa_fld = 0.;
+  ba.Omega_EDE = 0.;
+  ba.cs2_fld = 1.;
+
+  ba.shooting_failed = _FALSE_;
+}
+
+ClassCosmo::DictCosmology ClassCosmo::getCosmology() {
+
+  DictCosmology state;
+
+  state["Omega_g"] = opaque->ba.Omega0_g;
+  state["Omega_m"] = opaque->ba.Omega0_m;
+  state["N_ncdm"] = opaque->ba.N_ncdm;
+  state[lssfmt::format("Omega0_ncdm_%d", 0)] = opaque->ba.Omega0_ncdm[0];
+  state["Omega_k"] = opaque->ba.Omega0_k;
+  state["Omega_lambda"] = opaque->ba.Omega0_lambda;
+  state["Omega_m"] = opaque->ba.Omega0_m;
+
+  return state;
+}
+
+void ClassCosmo::setInterpolation(size_t numPoints) {
+  numInterpolationPoints = numPoints;
+}
+
+// ARES TAG: num_authors = 2
+// ARES TAG: name(0) = Jens Jasche
+// ARES TAG: email(0) = jens.jasche@fysik.su.se
+// ARES TAG: year(0) = 2020
+// ARES TAG: name(1) = Guilhem Lavaux
+// ARES TAG: email(1) = guilhem.lavaux@iap.fr
+// ARES TAG: year(1) = 2021
--- a/libLSS/physics/class_cosmo.hpp
+++ b/libLSS/physics/class_cosmo.hpp
@ -0,0 +1,55 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/class_cosmo.hpp
+    Copyright (C) 2020 Jens Jasche <jens.jasche@fysik.su.se>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_CLASS_COSMO_HPP
+#  define __LIBLSS_CLASS_COSMO_HPP
+
+#  include <map>
+#  include <string>
+#  include <memory>
+#  include "libLSS/physics/cosmo.hpp"
+
+namespace LibLSS {
+
+  struct OpaqueClass;
+
+  class ClassCosmo {
+  private:
+    std::unique_ptr<OpaqueClass> opaque;
+    typedef boost::multi_array_ref<double, 1> array_ref_1d;
+    typedef boost::multi_array<double, 1> array_1d;
+
+    size_t numInterpolationPoints;
+
+  public:
+    typedef std::map<std::string, double> DictCosmology;
+
+    ClassCosmo(CosmologicalParameters const &params); // This is the constructor
+    ~ClassCosmo();
+
+    void setInterpolation(size_t numPoints);
+
+    double primordial_Pk(double k);
+    void updateCosmo();
+    double get_Tk(double k);
+    void retrieve_Tk();
+    DictCosmology getCosmology();
+
+  protected:
+    void reinterpolate(array_ref_1d const &k, array_ref_1d const &Tk);
+  };
+
+} // namespace LibLSS
+
+#endif
+
+// ARES TAG: num_authors = 1
+// ARES TAG: name(0) = Jens Jasche
+// ARES TAG: email(0) = jens.jasche@fysik.su.se
+// ARES TAG: year(0) = 2020
--- a/libLSS/physics/classic_cic.hpp
+++ b/libLSS/physics/classic_cic.hpp
--- a/libLSS/physics/classic_gpot.hpp
+++ b/libLSS/physics/classic_gpot.hpp
@ -0,0 +1,71 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/classic_gpot.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_PHYSICS_CLASSIC_GPOT_HPP
+#define __LIBLSS_PHYSICS_CLASSIC_GPOT_HPP
+
+#include <cmath>
+#include "libLSS/tools/console.hpp"
+#include <boost/multi_array.hpp>
+
+using namespace LibLSS;
+typedef boost::multi_array_types::extent_range range;
+using boost::extents;
+using boost::format;
+
+
+namespace LibLSS {
+
+    template<typename T>
+    struct ClassicGravitationalPotential {
+        typedef T Type;
+        typedef boost::multi_array<T, 3> DensityArray;
+        typedef boost::multi_array<T, 3> GravityArray;
+        
+        template<typename PotentialArray>
+        static void potential(const PotentialArray& dens, PotentialArray& pot, T Om, T L0, T L1, T L2, 
+                               int N0, int N1, int N2) {
+            ConsoleContext<LOG_DEBUG> ctx("Classic GravitationalPotential estimation");
+            
+            //transform density to F-space
+            MFCalls::execute_r2c(analysis_plan, dens.data(), AUX0.data());
+            
+            double normphi=3./2.*Om;
+
+            #pragma omp parallel for
+            for (int i=0 ; i<startN0+localN0;i++)
+                for (int j=0 ; j<N1;j++)
+                    for (int k=0; k<N2_HC;k++)
+                    {
+                        double kk[3];
+                        kk[0]=kmode(i,N0,L0);
+                        kk[1]=kmode(j,N1,L1);
+                        kk[2]=kmode(k,N2,L2);
+                    
+                        double sin20 = sin(kk[0]/2.)*sin(kk[0]/2.);
+			            double sin21 = sin(kk[1]/2.)*sin(kk[1]/2.);
+			            double sin22 = sin(kk[2]/2.)*sin(kk[2]/2.);	
+
+			            double Greens = - normphi/4./(sin20+sin21+sin22);
+		  
+                        AUX0[i][j][k] *= Greens;                        
+	                }
+	        //fix zero mode by hand
+	        if (startN0 == 0 && localN0 > 0) {
+            AUX0[0][0][0]=0;
+            }
+           
+            MFCalls::execute_c2r(synthesis_plan, AUX0.data(), pot.data());            
+        }
+    };
+    
+    
+}
+
+#endif
--- a/libLSS/physics/cosmo.cpp
+++ b/libLSS/physics/cosmo.cpp
@ -0,0 +1,482 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/cosmo.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <gsl/gsl_const_num.h>
+#include <gsl/gsl_const_mksa.h>
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_odeiv.h>
+#include <gsl/gsl_linalg.h>
+#include <gsl/gsl_integration.h>
+#include <cmath>
+#include <CosmoTool/algo.hpp>
+#include "cosmo.hpp"
+#include "libLSS/tools/gsl_error.hpp"
+#include "libLSS/tools/gslIntegrate.hpp"
+
+using namespace LibLSS;
+using namespace std;
+using CosmoTool::spower;
+using CosmoTool::square;
+
+#define epsabs COSMO_EPS
+#define epsrel COSMO_EPS
+
+static const int NEVAL = 1000;
+static const double cosmo_Ggrav = GSL_CONST_MKSA_GRAVITATIONAL_CONSTANT;
+static const double cosmo_clight =
+    GSL_CONST_MKSA_SPEED_OF_LIGHT; /* speed of light in m/s */
+static const double cosmo_kB =
+    GSL_CONST_MKSA_BOLTZMANN; /* Boltzmann constant  in m^2 * kg /s^2 /K */
+static const double cosmo_parsec = GSL_CONST_MKSA_PARSEC; /* parsec in m */
+static const double cosmo_kparsec = (1.0e3 * cosmo_parsec);
+static const double cosmo_mparsec = (1.0e6 * cosmo_parsec);
+static const double cosmo_gparsec = (1.0e9 * cosmo_parsec);
+static const double cosmo_hubble =
+    (1.0e5 / cosmo_mparsec); /* Hubble constant in 1/s */
+static const double cosmo_mp =
+    GSL_CONST_MKSA_MASS_PROTON; /* Mass of proton kg */
+
+static const double cosmo_Mpc_cm = (1.0e2 * cosmo_mparsec);  // cm
+static const double cosmo_Mpc_m = (1.0e0 * cosmo_mparsec);   // m
+static const double cosmo_Mpc_km = (1.0e-3 * cosmo_mparsec); // km
+static const double cosmo_H100_s = (100. / cosmo_Mpc_km);    // s^-1
+
+static const double cosmo_M_sun = (1.98892e33); // g
+
+static const double cosmo_G_const_Mpc_Msun_s =
+    (cosmo_M_sun * (6.673e-8) / cosmo_Mpc_cm / cosmo_Mpc_cm /
+     cosmo_Mpc_cm); // Mpc^3 msun^-1 s^-2
+
+static const double AMIN = 1e-6;
+
+static double x_plus(double a, const CosmologicalParameters &p);
+
+/* --- function w [dark energy eos parameter - time evolution] --- */
+static double w_Q(double a, const CosmologicalParameters &p) {
+  return p.w + p.wprime * (1.0 - a);
+}
+
+/* --- function aux_q --- */
+static double aux_q(double a, const CosmologicalParameters &p) {
+  return 3.0 / 2.0 * (1.0 - w_Q(a, p) / (1.0 + x_plus(a, p))) / a;
+}
+
+/* --- function x_plus [auxiliary function, see Linder+Jenkins MNRAS 346, 573-583 (2003) for definition] --- */
+static double x_plus(double a, const CosmologicalParameters &p) {
+  double aux = 3 * p.wprime * (1 - a);
+  return p.omega_m / (1 - p.omega_m) * pow(a, 3 * (p.w + p.wprime)) * exp(aux);
+}
+
+/* --- function dx_plus [derivative of x_plus, dx_plus(a) = d(x_plus(a))/da] --- */
+double x_plus_prime(double a, const CosmologicalParameters &p) {
+  return 3 * x_plus(a, p) * w_Q(a, p) / a;
+}
+
+/* --- function aux_r --- */
+static double aux_r(double a, const CosmologicalParameters &p) {
+  double aux = x_plus(a, p);
+  return 3.0 / 2.0 * aux / (1.0 + aux) / spower<2, double>(a);
+}
+
+/* ---  --- */
+static double aux_dr(double a, const CosmologicalParameters &p) {
+  double ra;
+
+  ra = aux_r(a, p);
+  return x_plus_prime(a, p) / (1.0 + x_plus(a, p)) *
+             (3.0 / 2.0 / spower<2, double>(a) - ra) -
+         2.0 * ra / a;
+}
+
+/* ---  --- */
+static double aux_dq(double a, const CosmologicalParameters &p) {
+  double xp, result;
+
+  xp = 1.0 + x_plus(a, p);
+  result = -aux_q(a, p) / a;
+  result -=
+      3.0 / 2.0 / a / xp * (p.wprime - w_Q(a, p) * x_plus_prime(a, p) / xp);
+  result /= a;
+
+  return result;
+}
+
+/* --- function dplus_function - defines f0 = dy1/da and f1 = dy2/da --- */
+static int d_plus_function(double t, const double y[], double f[], void *data) {
+  CosmologicalParameters *params = (CosmologicalParameters *)data;
+
+  /* derivatives f_i = dy_i/dt */
+  f[0] = y[1];
+  f[1] = aux_r(t, *params) * y[0] - aux_q(t, *params) * y[1];
+
+  return (GSL_SUCCESS);
+}
+
+static int d_plus_jacobian(
+    double t, const double y[], double *dfdy, double dfdt[], void *data) {
+  gsl_matrix_view dfdy_mat = gsl_matrix_view_array(dfdy, 2, 2);
+  gsl_matrix *m = &dfdy_mat.matrix;
+
+  CosmologicalParameters *params = (CosmologicalParameters *)data;
+
+  /* jacobian df_i(t,y(t)) / dy_j */
+  gsl_matrix_set(m, 0, 0, 0.0);
+  gsl_matrix_set(m, 0, 1, 1.0);
+  gsl_matrix_set(m, 1, 0, 0.0);
+  gsl_matrix_set(m, 1, 1, -aux_q(t, *params));
+
+  /* gradient df_i/dt, explicit dependence */
+  dfdt[0] = 0.0;
+  dfdt[1] = aux_dr(t, *params) * y[0] - aux_dq(t, *params) * y[1];
+
+  return GSL_SUCCESS;
+}
+
+static double hubble(double a, const CosmologicalParameters &p) {
+  using CosmoTool::spower;
+  double result;
+  double aux;
+
+  result = p.omega_r / spower<4, double>(a);
+  result += p.omega_m / spower<3, double>(a);
+  result += p.omega_k / spower<2, double>(a);
+
+  aux = -(1 + p.w + p.wprime) * log(a) + p.wprime * (a - 1);
+  result += p.omega_q * exp(3 * aux);
+
+  return p.h * 100 * sqrt(result);
+}
+
+Cosmology::Cosmology(const CosmologicalParameters &p) : parameters(p) {
+  // Do a check if the cosmological parameters sum up to 1
+  Console::instance().print<LOG_DEBUG>(
+      "Checking the normalization of cosmological parameters");
+  double sum = 0;
+  sum = p.omega_r + p.omega_m + p.omega_k + p.omega_q;
+  if (sum != 1.0) {
+    error_helper<ErrorBadState>("omega_r + omega_m + omega_k + omega_q != 1");
+  }
+  norm_d_plus = aux_d_plus(1.0);
+}
+
+double Cosmology::Hubble(double a) const { return hubble(a, parameters); }
+
+void Cosmology::precompute_d_plus() {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+  double result;
+  int status;
+  const gsl_odeiv_step_type *T = gsl_odeiv_step_bsimp;
+  gsl_odeiv_step *s = gsl_odeiv_step_alloc(T, 2);
+  gsl_odeiv_control *c = gsl_odeiv_control_y_new(0.0, epsrel);
+  gsl_odeiv_evolve *e = gsl_odeiv_evolve_alloc(2);
+  double t = AMIN,
+         habs = 1e-4; /* t = initial scale factor, h = absolute accuracy */
+  // TODO: Improve the initial condition
+  // If matter dominated era used to anchor the solution
+  // D(a) \propto a for a->0
+  // Thus D'(a) = D(a)/a
+  double y_prev[2],
+      y[2] = {1.0, 1.0 / AMIN}; /* initial conditions, y(0) = 1.0, y'(0) = 0 */
+  const double AMAX = 1.0;
+
+  /* result from solution of a 2nd order differential equation, transformed to a system of 2 1st order deqs */
+  gsl_odeiv_system sys = {
+      d_plus_function, d_plus_jacobian, 2, (void *)&parameters};
+
+  unsigned int NUM_D = 10000;
+
+  const double log_a_min = std::log(AMIN);
+  const double log_a_max = std::log(AMAX);
+  const double delta_log_a = (log_a_max - log_a_min) / (NUM_D - 1);
+  auto D_data = new boost::multi_array<double, 1>(boost::extents[NUM_D + 1]);
+  auto Dprime_data =
+      new boost::multi_array<double, 1>(boost::extents[NUM_D + 1]);
+  unsigned int j = 0;
+
+  auto get_a = [&](unsigned int k) {
+    return std::exp(log_a_min + k * delta_log_a);
+  };
+  double a_current = get_a(0);
+
+  (*D_data)[0] = std::log(y[0]);
+  (*Dprime_data)[0] = std::log(y[1]);
+
+  for (j = 1; j <= NUM_D; j++) {
+    a_current = get_a(j);
+    while (t < a_current) {
+      status = gsl_odeiv_evolve_apply(e, c, s, &sys, &t, a_current, &habs, y);
+      if (status != GSL_SUCCESS) {
+        error_helper<ErrorBadState>("Error during ODE integration of Dplus");
+      }
+    }
+
+    (*D_data)[j] = std::log(y[0]);
+    (*Dprime_data)[j] = std::log(y[1]);
+  }
+
+  gsl_odeiv_evolve_free(e);
+  gsl_odeiv_control_free(c);
+  gsl_odeiv_step_free(s);
+
+  pre_dplus = std::make_shared<auto_interpolator<double>>(
+      log_a_min, log_a_max, delta_log_a, 0,
+      std::numeric_limits<double>::infinity(), D_data);
+  pre_dplus->setThrowOnOverflow();
+
+  pre_dplus_prime = std::make_shared<auto_interpolator<double>>(
+      log_a_min, log_a_max, delta_log_a, 0,
+      std::numeric_limits<double>::infinity(), Dprime_data);
+  pre_dplus_prime->setThrowOnOverflow();
+
+  norm_d_plus = std::exp((*pre_dplus)(std::log(1.0)));
+}
+
+double Cosmology::aux_d_plus(double a, double *result_d_plus_prime) const {
+
+  if (pre_dplus && pre_dplus_prime) {
+    double result = std::exp((*pre_dplus)(std::log(a)));
+    if (result_d_plus_prime != 0)
+      *result_d_plus_prime = std::exp((*pre_dplus_prime)(std::log(a)));
+    return result;
+  }
+
+  double result;
+  int status;
+  const gsl_odeiv_step_type *T = gsl_odeiv_step_bsimp;
+  gsl_odeiv_step *s = gsl_odeiv_step_alloc(T, 2);
+  gsl_odeiv_control *c = gsl_odeiv_control_y_new(0.0, epsrel);
+  gsl_odeiv_evolve *e = gsl_odeiv_evolve_alloc(2);
+  double t = AMIN,
+         habs = 1e-4; /* t = initial scale factor, h = absolute accuracy */
+  double y[2] = {
+      1.0, 1.0 / AMIN}; /* initial conditions, dy1(0)/da = 1, dy2(0)/da=0 */
+
+  /* result from solution of a 2nd order differential equation, transformed to a system of 2 1st order deqs */
+  gsl_odeiv_system sys = {
+      d_plus_function, d_plus_jacobian, 2, (void *)&parameters};
+
+  while (t < a) {
+    status = gsl_odeiv_evolve_apply(e, c, s, &sys, &t, a, &habs, y);
+    if (status != GSL_SUCCESS)
+      break;
+  }
+
+  gsl_odeiv_evolve_free(e);
+  gsl_odeiv_control_free(c);
+  gsl_odeiv_step_free(s);
+
+  result = y[0]; /* d_plus */
+  if (result_d_plus_prime)
+    *result_d_plus_prime = y[1]; /* d(d_plus)/da */
+
+  return result;
+}
+
+double Cosmology::dcom_dz(double z) const {
+  double result;
+
+  double a = 1. / (z + 1.);
+
+  result = cosmo_clight / Hubble(a) / cosmo_mparsec;
+
+  return (result);
+}
+
+double aux_dcom(double a, void *params) {
+  double result;
+  const CosmologicalParameters &p = *(const CosmologicalParameters *)params;
+
+  result = -1. / square(a) / hubble(a, p);
+
+  double clight = cosmo_clight / 1000.; ///km/s
+
+  return (clight * result);
+}
+
+double Cosmology::a2com(double a) const {
+  double result, error;
+  gsl_integration_workspace *wspace = gsl_integration_workspace_alloc(NEVAL);
+  gsl_function F;
+
+  F.function = &aux_dcom;
+  F.params = (void *)&parameters;
+  gsl_integration_qag(
+      &F, 1.0, a, epsabs, epsrel, NEVAL, GSL_INTEG_GAUSS61, wspace, &result,
+      &error);
+
+  gsl_integration_workspace_free(wspace);
+
+  return (result);
+}
+
+void Cosmology::precompute_com2a() {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+  if (pre_com2a)
+    return;
+
+  const unsigned int NUM_A = 10000; // TODO: benchmark precision
+  const double log_a_min = std::log(1e-4);
+  const double delta_log_a = std::log(1.0 / 1e-4) / NUM_A;
+
+  boost::multi_array<double, 1> log_d(boost::extents[NUM_A]);
+
+#pragma omp parallel for
+  for (unsigned int i = 0; i < NUM_A; i++) {
+    const double a = std::exp(delta_log_a * i + log_a_min);
+
+    log_d[i] = std::log(a2com(a));
+  }
+
+  const double log_d_min = log_d[NUM_A - 1];
+  const double log_d_max = log_d[0];
+  const double delta_log_d = (log_d_max - log_d_min) / NUM_A;
+
+  auto log_a = new boost::multi_array<double, 1>(boost::extents[NUM_A]);
+  double current_log_d = log_d_min;
+  (*log_a)[0] = delta_log_a * (NUM_A - 1) + log_a_min;
+  unsigned int j = NUM_A - 1;
+  for (unsigned int i = 1; i < NUM_A; i++) {
+    current_log_d += delta_log_d;
+    while (current_log_d > log_d[j]) {
+      if (j == 0) {
+        ctx.print2<LOG_ERROR>("Bad reinterpolation state.");
+        MPI_Communication::instance()->abort();
+      }
+      j--;
+    }
+    Console::instance().c_assert(
+        j < NUM_A - 1, "Invalid state of the reinterpolation");
+    const double w = (current_log_d - log_d[j]) / (log_d[j + 1] - log_d[j]);
+    (*log_a)[i] = log_a_min + delta_log_a * ((1 - w) * j + (j + 1) * w);
+  }
+
+  pre_com2a = std::make_shared<auto_interpolator<double>>(
+      log_d_min, log_d_max, delta_log_d, 0,
+      std::numeric_limits<double>::infinity(), log_a);
+  pre_com2a->setThrowOnOverflow();
+}
+
+double Cosmology::com2a(double com) const {
+  if (pre_com2a) {
+    return std::exp((*pre_com2a)(std::log(com)));
+  }
+  return bisection(
+      A_MIN, A_MAX, 1e-6, com, [this](double a) { return a2com(a); });
+}
+
+double Cosmology::comph2a(double r) const {
+  double result = com2a(comph2com(r));
+  return (result);
+}
+
+double Cosmology::comph2d_plus(double r) const {
+  double a = com2a(comph2com(r));
+  double result = d_plus(a);
+  return (result);
+}
+
+double Cosmology::comph2g_plus(double r) const {
+  double a = com2a(comph2com(r));
+  double result = g_plus(a);
+  return (result);
+}
+
+double Cosmology::comph2Hubble(double r) const {
+  double a = com2a(comph2com(r));
+  double result = Hubble(a);
+  return (result);
+}
+
+/* --- function aux_dtr [auxiliary function for dtr] --- */
+double aux_dtr(double a, void *params) {
+  double result;
+  const CosmologicalParameters &p = *(const CosmologicalParameters *)params;
+
+  ///Fhubble=H0/adot
+  double H0 = 100.; ///km/s/Mpc
+
+  double FHubble = (p.h * H0 / hubble(a, p) / (a * a * a));
+
+  result = FHubble;
+
+  return (result);
+}
+/* --- function pm_time-stepping dtr --- */
+double Cosmology::dtr(double ai, double af) {
+  double result, error;
+  gsl_integration_workspace *wspace = gsl_integration_workspace_alloc(NEVAL);
+  gsl_function F;
+
+  F.function = &aux_dtr;
+  F.params = (void *)&parameters;
+  gsl_integration_qag(
+      &F, ai, af, epsabs, epsrel, NEVAL, GSL_INTEG_GAUSS61, wspace, &result,
+      &error);
+
+  gsl_integration_workspace_free(wspace);
+
+  return (result);
+}
+/* --- end of function dtv --- */
+/* --- function aux_dtv [auxiliary function for dtv] --- */
+double aux_dtv(double a, void *params) {
+  double result;
+  const CosmologicalParameters &p = *(const CosmologicalParameters *)params;
+
+  ///Fhubble=H0/adot
+  double H0 = 100.; ///km/s/Mpc
+
+  double FHubble = (p.h * H0 / hubble(a, p) / a / a);
+
+  result = FHubble;
+
+  return (result);
+}
+/* --- function pm_time-stepping dtv --- */
+double Cosmology::dtv(double ai, double af) {
+  double result, error;
+  gsl_integration_workspace *wspace = gsl_integration_workspace_alloc(NEVAL);
+  gsl_function F;
+
+  F.function = &aux_dtv;
+  F.params = (void *)&parameters;
+  gsl_integration_qag(
+      &F, ai, af, epsabs, epsrel, NEVAL, GSL_INTEG_GAUSS61, wspace, &result,
+      &error);
+
+  gsl_integration_workspace_free(wspace);
+
+  return (result);
+}
+/* --- end of function dtv --- */
+
+/* --- COLA time stepping --- */
+double Cosmology::integral_d_plus(double ai, double af) {
+  return gslIntegrate(
+      [this](double a) -> double {
+        return aux_dtv(a, &parameters) * d_plus(a);
+      },
+      ai, af, epsrel, NEVAL);
+}
+/* --- end --- */
+
+/* --- function critical density --- */
+double Cosmology::rho_crit() {
+  double rho_c = 3. * pow(parameters.h * cosmo_H100_s, 2.) /
+                 (8. * M_PI * cosmo_G_const_Mpc_Msun_s); //units [Msun/Mpc^3]
+  //calculates the critical density in units [Msun/(Mpc h^-1)^3]
+  return rho_c / parameters.h / parameters.h / parameters.h;
+}
+/* --- end of function critical density --- */
+/* --- function mass of volume --- */
+double Cosmology::mass_of_volume(double V) {
+  //returns the mean mass of a volume in units[Msun]
+  return rho_crit() * parameters.omega_m * V;
+}
+/* --- end of function mass of volume --- */
--- a/libLSS/physics/cosmo.hpp
+++ b/libLSS/physics/cosmo.hpp
@ -0,0 +1,196 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/cosmo.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_COSMO_HPP
+#define __LIBLSS_COSMO_HPP
+
+#include <string>
+#include <functional>
+#include <CosmoTool/hdf5_array.hpp>
+#include "libLSS/tools/bisection.hpp"
+#include "libLSS/tools/auto_interpolator.hpp"
+
+namespace LibLSS {
+
+  struct CosmologicalParameters {
+
+    double omega_r; /* negligible radiation density */
+    double omega_k; /* curvature - flat prior for everything! */
+    double omega_m;
+    double omega_b;
+    double omega_q;
+    double w;
+    double n_s;
+    double fnl; /* non-linearity parameter, for primordial non-Gaussianity */
+    double wprime;
+    double sigma8;
+    double rsmooth;
+    double h;
+    double beta;
+    double z0;
+    double a0;      /* scale factor at epoch of observation usually 1*/
+    double sum_mnu; /* sum of neutrino masses */
+
+    CosmologicalParameters()
+        : omega_r(0), omega_k(0), omega_m(0), omega_b(0), omega_q(0), w(0),
+          n_s(0), fnl(0), wprime(0), sigma8(0), h(0), beta(0), z0(0), a0(0),
+          sum_mnu(0) {}
+
+    bool operator==(CosmologicalParameters const &p2) const {
+      return omega_r == p2.omega_r && omega_k == p2.omega_k &&
+             omega_m == p2.omega_m && omega_b == p2.omega_b &&
+             omega_q == p2.omega_q && w == p2.w && n_s == p2.n_s &&
+             wprime == p2.wprime && sigma8 == p2.sigma8 && h == p2.h &&
+             sum_mnu == p2.sum_mnu;
+    }
+    bool operator!=(CosmologicalParameters const &p2) const {
+      return !operator==(p2);
+    }
+  };
+
+  static const double A_MIN = 0.;
+  static const double A_MAX = 30000.;
+  static const double COSMO_EPS = 1e-6;
+
+#define LIBLSS_COSMOLOGY_INVERSE_FUNCTION(                                     \
+    TARGET, ORIGINAL, RANGE_MIN, RANGE_MAX)                                    \
+  double TARGET(double X) const {                                              \
+    return bisection(                                                          \
+        RANGE_MIN, RANGE_MAX, 1e-6, X,                                         \
+        std::bind(&Cosmology::ORIGINAL, this, std::placeholders::_1));         \
+  }
+
+  class Cosmology {
+  private:
+    CosmologicalParameters parameters;
+    double A_spec; /* Normalization of the power spectrum */
+    int spec_type; /* indicates which power spectrum is currently used, and whether the normalization has to be reevaluated*/
+    double norm_d_plus;
+    double aux_d_plus(double a, double *result_d_plus_prime = 0) const;
+
+    std::shared_ptr<auto_interpolator<double>> pre_com2a, pre_dplus,
+        pre_dplus_prime;
+
+  public:
+    Cosmology(const CosmologicalParameters &parameters);
+
+    void precompute_com2a();
+    void precompute_d_plus();
+
+    CosmologicalParameters const &getParameters() const { return parameters; }
+
+    double a2z(double a) const { return 1 / a - 1; }
+    double z2a(double z) const { return 1 / (1 + z); }
+    double d_plus(double a) const { return aux_d_plus(a) / norm_d_plus; }
+
+    double d2dlum(double z, double d) const { return (1 + z) * d; }
+
+    double dlum2d(double z, double dlum) const { return dlum / (1 + z); }
+
+    double g_plus(double a) const {
+      double d_plus, d_plus_prime;
+
+      d_plus = aux_d_plus(a, &d_plus_prime);
+      return (a > COSMO_EPS) ? (a / d_plus * d_plus_prime) : 1.0;
+    }
+
+    double a2com(double a) const;
+    double com2a(double com) const;
+
+    double z2com(double z) const {
+      double a = z2a(z);
+      double dcom = a2com(a);
+      return dcom;
+    };
+
+    double dcom_dz(double z) const;
+
+    double com2comph(double r) const { return parameters.h * r; }
+    double comph2com(double r) const { return r / parameters.h; }
+
+    double comph2d_plus(double r) const;
+    double comph2g_plus(double r) const;
+    double comph2Hubble(double r) const;
+    double comph2a(double r) const;
+
+    double a2dlum(double a) const {
+      double z = a2z(a);
+      double dcom = a2com(a);
+      return (1 + z) * dcom;
+    };
+
+    double z2dlum(double z) const {
+      double a = z2a(z);
+      double dcom = a2com(a);
+      return (1 + z) * dcom;
+    };
+
+    LIBLSS_COSMOLOGY_INVERSE_FUNCTION(dlum2a, a2dlum, A_MIN, A_MAX);
+
+    double a2dA(double a) const {
+      double z = a2z(a);
+      double dcom = a2com(a);
+      return dcom / (1 + z);
+    };
+
+    double z2dA(double z) const {
+      double a = z2a(z);
+      double dcom = a2com(a);
+      return dcom / (1 + z);
+    };
+
+    //a2dA not invertible over full redhsiftrange
+    LIBLSS_COSMOLOGY_INVERSE_FUNCTION(dA2a, a2dA, 0.5, A_MAX);
+
+    double Hubble(double a) const;
+    double hNow() const { return parameters.h; }
+    double k_J(double a);
+    double kF_baryon(double a);
+    double kSZ_kernel(double a);
+    void print_cdmspec2file(std::string outputFileName);
+    double power_spectrum(double k, int type);
+    double transfer_function(double k);
+    double power_spectrum_grav(double k, int type);
+    double rho_background_matter(double a);
+    double gravpot_norm();
+    double return_cosmo_par(std::string cosmopar);
+    double FHubble(double a) { return (parameters.h * 100 / (a * Hubble(a))); }
+    double dtr(double ai, double af);
+    double dtv(double ai, double af);
+    double integral_d_plus(double ai, double af);
+    double rho_crit();
+    double mass_of_volume(double V);
+  };
+
+#undef LIBLSS_COSMOLOGY_INVERSE_FUNCTION
+
+} // namespace LibLSS
+
+// clang-format off
+CTOOL_STRUCT_TYPE(LibLSS::CosmologicalParameters,
+                  HDF5T_CosmologicalParameters,
+    ((double, omega_r))
+    ((double, omega_k))
+    ((double, omega_m))
+    ((double, omega_b))
+    ((double, omega_q))
+    ((double, w))
+    ((double, n_s))
+    ((double, fnl))
+    ((double, wprime))
+    ((double, sigma8))
+    ((double, rsmooth))
+    ((double, h))
+    ((double, beta))
+    ((double, z0))
+    ((double, a0))
+);
+// clang-format on
+
+#endif
--- a/libLSS/physics/cosmo_power.hpp
+++ b/libLSS/physics/cosmo_power.hpp
@ -0,0 +1,61 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/cosmo_power.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __ARES_COSMO_POWER_HPP
+#define __ARES_COSMO_POWER_HPP
+
+#include <CosmoTool/algo.hpp>
+#include <CosmoTool/cosmopower.hpp>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/log_traits.hpp"
+#include "libLSS/mcmc/global_state.hpp"
+#include <boost/format.hpp>
+#include "libLSS/physics/cosmo.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+
+namespace LibLSS {
+
+  inline void createCosmologicalPowerSpectrum(
+      MarkovState &state, CosmologicalParameters &cosmo_params,
+      double adjust = 1) {
+    double h;
+    using CosmoTool::square;
+    ConsoleContext<LOG_INFO_SINGLE> ctx("filling cosmological power spectrum");
+
+    CosmoTool::CosmoPower cpower;
+
+    double Rsmooth = 0; // 1.6;
+    h = cpower.h = cosmo_params.h;
+    cpower.OMEGA_B = cosmo_params.omega_b;
+    cpower.OMEGA_C = cosmo_params.omega_m - cosmo_params.omega_b;
+    cpower.SIGMA8 = cosmo_params.sigma8;
+    cpower.n = cosmo_params.n_s;
+    ctx.print(
+        boost::format(
+            "sigma8 = %g, OmegaB = %g, Omega_C = %g, Omega_M = %g, h = %g") %
+        cpower.SIGMA8 % cpower.OMEGA_B % cpower.OMEGA_C % cosmo_params.omega_m %
+        h);
+    cpower.updateCosmology();
+    cpower.setFunction(CosmoTool::CosmoPower::HU_WIGGLES);
+    cpower.normalize();
+
+    ArrayType1d::ArrayType &k = *state.get<ArrayType1d>("k_modes")->array;
+    ArrayType1d::ArrayType &Pk =
+        *state.get<ArrayType1d>("powerspectrum")->array;
+    for (long i = 0; i < k.num_elements(); i++) {
+      Pk[i] = cpower.power(k[i] * h) * h * h * h * adjust *
+              std::exp(-square(k[i] * Rsmooth));
+    }
+
+    // Notify that the power spectrum is ready.
+//    state.get<ArrayType1d>("powerspectrum")->deferInit.submit_ready();
+  }
+} // namespace LibLSS
+
+#endif
--- a/libLSS/physics/generic_cic.hpp
+++ b/libLSS/physics/generic_cic.hpp
@ -0,0 +1,228 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/generic_cic.hpp
+    Copyright (C) 2009-2019 Jens Jasche <jens.jasche@fysik.su.se>
+    Copyright (C) 2014-2019 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2019 Florent Leclercq <florent.leclercq@polytechnique.org>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_GENERIC_CIC_HPP
+#define __LIBLSS_GENERIC_CIC_HPP
+
+#include <boost/config.hpp>
+
+namespace LibLSS {
+
+    namespace CIC_Tools {
+
+        struct NonPeriodic {
+            NonPeriodic(int, int, int ) {}
+
+            template<typename I>
+            void operator()(I& i, I& j, I& k) const {}
+        };
+
+        struct Periodic {
+            int N0, N1, N2;
+
+            Periodic(int fN0, int fN1, int fN2) :
+                N0(fN0), N1(fN1), N2(fN2) {}
+
+            template<typename I>
+            void operator()(I& i, I& j, I& k) const {
+                if (i>=N0) i %= N0;
+                if (j>=N1) j %= N1;
+                if (k>=N2) k %= N2;
+            }
+        };
+
+        struct Periodic_MPI {
+            size_t N0, N1, N2;
+
+            Periodic_MPI(size_t fN0, size_t fN1, size_t fN2, MPI_Communication *comm) :
+                N0(fN0), N1(fN1), N2(fN2) {}
+
+            template<typename I>
+            void operator()(I& i, I& j, I& k) const {
+                if (j>=N1) j %= N1;
+                if (k>=N2) k %= N2;
+            }
+        };
+
+        struct DefaultWeight  {
+            BOOST_STATIC_CONSTANT(size_t, dimensionality = 1);
+            double operator[](long) const { return 1; }
+        };
+        
+        struct DefaultWeightDim2  {
+            BOOST_STATIC_CONSTANT(size_t, dimensionality = 2);
+            auto operator[](long) const { return DefaultWeight(); }
+        };
+    }
+
+
+    template<typename T,typename ImplType>
+    class GenericCIC {
+    public:
+        typedef ImplType impl;
+
+        template<typename ParticleArray, typename ProjectionDensityArray, typename WeightArray,
+                 typename PeriodicFunction>
+        static void projection(const ParticleArray& particles, ProjectionDensityArray& density,
+                               T Lx, T Ly, T Lz,
+                               int N0, int N1, int N2,
+                               const PeriodicFunction& p, const WeightArray& weight, size_t Np) {
+            impl::projection(particles, density, Lx, Ly, Lz, N0, N1, N2, p, weight, Np);
+        }
+
+
+        template<typename ParticleArray, typename ProjectionDensityArray, typename WeightArray,
+                 typename PeriodicFunction>
+        static void projection(const ParticleArray& particles, ProjectionDensityArray& density,
+                               T Lx, T Ly, T Lz,
+                               int N0, int N1, int N2,
+                               const PeriodicFunction& p, const WeightArray& weight) {
+          impl::projection(particles, density, Lx, Ly, Lz, N0, N1, N2, p, weight, particles.shape()[0]);
+        }
+
+        template<typename ParticleArray, typename ProjectionDensityArray, typename PeriodicFunction>
+        static void projection(const ParticleArray& particles, ProjectionDensityArray& density,
+                               T Lx, T Ly, T Lz,
+                               int N0, int N1, int N2,
+                               const PeriodicFunction& p) {
+          impl::projection(particles, density, Lx, Ly, Lz, N0, N1, N2, p,
+                       CIC_Tools::DefaultWeight(), particles.shape()[0]);
+        }
+
+        template<typename ParticleArray, typename ProjectionDensityArray>
+        static void projection(const ParticleArray& particles, ProjectionDensityArray& density,
+                               T Lx, T Ly, T Lz,
+                               int N0, int N1, int N2) {
+          impl::projection(particles, density, Lx, Ly, Lz, N0, N1, N2, CIC_Tools::Periodic(N0, N1, N2),
+                           CIC_Tools::DefaultWeight(), particles.shape()[0]);
+        }
+        
+        template<typename ParticleBasedScalar, typename ParticleArray, typename ProjectionDensityArray,
+                  typename WeightArray, typename PeriodicFunction>
+        static void interpolation_scalar(ParticleBasedScalar &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p,
+        const WeightArray &weight, size_t Np) {
+            impl::interpolation_scalar(A, particles, field, Lx, Ly, Lz, N0, N1, N2, p, weight, Np);
+        }
+        
+        template<typename ParticleBasedScalar, typename ParticleArray, typename ProjectionDensityArray,
+                  typename WeightArray, typename PeriodicFunction>
+        static void interpolation_scalar(ParticleBasedScalar &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p,
+        const WeightArray &weight) {
+            impl::interpolation_scalar(A, particles, field, Lx, Ly, Lz, N0, N1, N2, p, weight, particles.shape()[0]);
+        }
+        
+        template<typename ParticleBasedScalar, typename ParticleArray, typename ProjectionDensityArray,
+                  typename WeightArray, typename PeriodicFunction>
+        static void interpolation_scalar(ParticleBasedScalar &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p) {
+            impl::interpolation_scalar(A, particles, field, Lx, Ly, Lz, N0, N1, N2, p, CIC_Tools::DefaultWeight(), particles.shape()[0]);
+        }
+        
+        template<typename ParticleBasedScalar, typename ParticleArray, typename ProjectionDensityArray,
+                  typename WeightArray, typename PeriodicFunction>
+        static void interpolation_scalar(ParticleBasedScalar &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2) {
+            impl::interpolation_scalar(A, particles, field, Lx, Ly, Lz, N0, N1, N2, CIC_Tools::Periodic(N0, N1, N2), CIC_Tools::DefaultWeight(), particles.shape()[0]);
+        }
+        
+        template<typename ParticleBasedArray, typename ParticleArray, typename ProjectionDensityArray,
+                 typename WeightArray, typename PeriodicFunction>
+        static void interpolation(ParticleBasedArray &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p,
+        const WeightArray &weight, size_t Np) {
+            impl::interpolation(A, particles, field, Lx, Ly, Lz, N0, N1, N2, p, weight, Np);
+        }
+
+        template<typename ParticleBasedArray, typename ParticleArray, typename ProjectionDensityArray,
+                 typename WeightArray, typename PeriodicFunction>
+        static void interpolation(ParticleBasedArray &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p,
+        const WeightArray &weight) {
+            impl::interpolation(A, particles, field, Lx, Ly, Lz, N0, N1, N2, p, weight, particles.shape()[0]);
+        }
+        
+        template<typename ParticleBasedArray, typename ParticleArray, typename ProjectionDensityArray,
+                 typename WeightArray, typename PeriodicFunction>
+        static void interpolation(ParticleBasedArray &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p) {
+            impl::interpolation(A, particles, field, Lx, Ly, Lz, N0, N1, N2, p, CIC_Tools::DefaultWeightDim2(), particles.shape()[0]);
+        }
+        
+        template<typename ParticleBasedArray, typename ParticleArray, typename ProjectionDensityArray,
+                 typename WeightArray, typename PeriodicFunction>
+        static void interpolation(ParticleBasedArray &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2) {
+            impl::interpolation(A, particles, field, Lx, Ly, Lz, N0, N1, N2, CIC_Tools::Periodic(N0, N1, N2), CIC_Tools::DefaultWeightDim2(), particles.shape()[0]);
+        }
+        
+        template<typename ParticleArray, typename GradientArray, typename ProjectionDensityArray, typename PeriodicFunction>
+        static void adjoint(const ParticleArray& particles, ProjectionDensityArray& density,
+                            GradientArray& adjoint_gradient,
+                            T Lx, T Ly, T Lz,
+                            int N0, int N1, int N2,
+                            const PeriodicFunction& p,
+                            T nmean, size_t Np) {
+            impl::adjoint(particles, density, adjoint_gradient,CIC_Tools::DefaultWeight(), Lx, Ly, Lz, N0, N1, N2, p, nmean, Np);
+        }
+
+        template<typename ParticleArray, typename GradientArray, typename ProjectionDensityArray, typename PeriodicFunction>
+        static void adjoint(const ParticleArray& particles, ProjectionDensityArray& density,
+                            GradientArray& adjoint_gradient,
+                            T Lx, T Ly, T Lz,
+                            int N0, int N1, int N2,
+                            const PeriodicFunction& p,
+                            T nmean) {
+            impl::adjoint(particles, density, adjoint_gradient,CIC_Tools::DefaultWeight(), Lx, Ly, Lz, N0, N1, N2, p, nmean, particles.shape()[0]);
+        }
+
+        template<typename ParticleArray, typename GradientArray, typename ProjectionDensityArray>
+        static void adjoint(const ParticleArray& particles, ProjectionDensityArray& density,
+                            GradientArray& adjoint_gradient,
+                            T Lx, T Ly, T Lz,
+                            int N0, int N1, int N2,
+                            T nmean) {
+          impl::adjoint(particles, density, adjoint_gradient,CIC_Tools::DefaultWeight(), Lx, Ly, Lz, N0, N1, N2, CIC_Tools::Periodic(N0, N1, N2), nmean, particles.shape()[0]);
+        }
+        
+        template<typename ParticleBasedScalar, typename ParticleArray, typename ProjectionDensityArray,
+        typename WeightArray, typename PeriodicFunction>
+        static void adjoint_interpolation_scalar(
+            int axis, ParticleBasedScalar &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx, T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p, const WeightArray &weight, size_t Np) {
+            impl::adjoint_interpolation_scalar(axis, A, particles, field, Lx, Ly, Lz, N0, N1, N2, p, weight, Np);
+        }
+        
+        template<typename ParticleBasedArray, typename ParticleArray, typename ProjectionDensityArray,
+                 typename WeightArray, typename PeriodicFunction>
+        static void adjoint_interpolation(
+            int axis, ParticleBasedArray &A, const ParticleArray &particles, const ProjectionDensityArray &field, T Lx,
+            T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p,
+            const WeightArray &weight, size_t Np) {
+            impl::adjoint_interpolation(axis, A, particles, field, Lx, Ly, Lz, N0, N1, N2, p, weight, Np);
+        }
+        
+
+    };
+
+}
+
+#endif
+
+// ARES TAG: authors_num = 3
+// ARES TAG: name(0) = Jens Jasche
+// ARES TAG: year(0) = 2009-2019
+// ARES TAG: email(0) = jens.jasche@fysik.su.se
+// ARES TAG: name(1) = Guilhem Lavaux
+// ARES TAG: year(1) = 2014-2019
+// ARES TAG: email(1) = guilhem.lavaux@iap.fr
+// ARES TAG: name(2) = Florent Leclercq
+// ARES TAG: year(2) = 2019
+// ARES TAG: email(2) = florent.leclercq@polytechnique.org
--- a/libLSS/physics/modified_ngp.hpp
+++ b/libLSS/physics/modified_ngp.hpp
@ -0,0 +1,454 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/modified_ngp.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_PHYSICS_MODIFIED_NGP_HPP
+#define __LIBLSS_PHYSICS_MODIFIED_NGP_HPP
+
+#include <cmath>
+#include "libLSS/tools/console.hpp"
+#include <boost/multi_array.hpp>
+#include "libLSS/physics/generic_cic.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include "libLSS/tools/compiler_tools.hpp"
+
+namespace LibLSS {
+
+  template <typename T, typename SubgridSpec, bool ignore_overflow>
+  struct ModifiedNGP_impl {
+    typedef T Type;
+    // Number of extra planes required in case of MPI
+    static const int MPI_PLANE_LEAKAGE = 1;
+    static const bool EXTRA_CHECK = true;
+    typedef boost::multi_array<T, 3> DensityArray;
+
+    //get virtual grid spacing
+    //for testing we choose subres=1. this should reprodice CIC
+    //particles will be assumed to be little boxes of size dx*subres
+    // subres = 1 corresponds to CIC
+    // subres -> 0 approaches NGP
+    static constexpr double subres = SubgridSpec::value;
+
+    template <typename A>
+    static inline void _safe_set(
+        A &&density, size_t const ix, size_t const iy, size_t const iz,
+        ssize_t const bounds[3][2], T const &value) {
+      if (ix >= bounds[0][0] && ix < bounds[0][1] && iy >= bounds[1][0] &&
+          iy < bounds[1][1] && iz >= bounds[2][0] && iz < bounds[2][1]) {
+        density[ix][iy][iz] += value;
+      }
+    }
+
+    // This function implements the particle projection to a grid.
+    // Arguments:
+    //   - particles (2d array: Nx3)
+    //   - density (3d array: N0xN1xN2, or slice thereof)
+    //   - Lx, Ly, Lz: physical size
+    //   - N0, N1, N2: grid size
+    //   - p: a function applying optional periodic boundary enforcement (depends on MPI for ghost plane)
+    //   - weight: per-particle weight functor, maybe returning only "1"
+    //   - Np: number of particles to project
+    template <
+        typename ParticleArray, typename ProjectionDensityArray,
+        typename WeightArray, typename PeriodicFunction>
+    static void projection(
+        const ParticleArray &particles, ProjectionDensityArray &density, T Lx,
+        T Ly, T Lz, int N0, int N1, int N2, const PeriodicFunction &p,
+        const WeightArray &weight, size_t Np) {
+      ConsoleContext<LOG_DEBUG> ctx("Modified NGP projection");
+
+      T inv_dx = N0 / Lx;
+      T inv_dy = N1 / Ly;
+      T inv_dz = N2 / Lz;
+
+      ssize_t minX = density.index_bases()[0];
+      ssize_t minY = density.index_bases()[1];
+      ssize_t minZ = density.index_bases()[2];
+      ssize_t maxX = minX + density.shape()[0];
+      ssize_t maxY = minY + density.shape()[1];
+      ssize_t maxZ = minZ + density.shape()[2];
+
+      ssize_t const bounds[3][2] = {{minX, maxX}, {minY, maxY}, {minZ, maxZ}};
+
+      ctx.format("minX=%d, maxX=%d, N0=%d", minX, maxX, N0);
+      ctx.format("minY=%d, maxY=%d, N1=%d", minY, maxY, N1);
+      ctx.format("minZ=%d, maxZ=%d, N2=%d", minZ, maxZ, N2);
+
+      for (long i = 0; i < Np; i++) {
+
+        //divide particle positions by target grid-size
+        //Note: all integer numbers are therefore defined at target resolution
+        T x = particles[i][0] * inv_dx;
+        T y = particles[i][1] * inv_dy;
+        T z = particles[i][2] * inv_dz;
+
+        //Note, we want to find the nearest lower left corner of a voxel that fully contains
+        //the box-shapep particle.
+        //we therefore have to find the nearest voxel for the lower left corner of the particel box
+
+        size_t ix = (size_t)std::floor(
+            x +
+            0.5 * (1. - subres)); //the offset of half a subresolution factor
+        size_t iy = (size_t)std::floor(
+            y +
+            0.5 *
+                (1. -
+                 subres)); //ensures the edges of the particle cloud are within
+        size_t iz = (size_t)std::floor(
+            z + 0.5 * (1. - subres)); //the lower voxel boundaries
+        //Note, it can be easily seen that for subres=1 the CIC scheme is recovered.
+
+        //now calculate distances before wrap-around
+        //if particle is fully contained in voxel assign the total mass
+        T rx = 0.;
+        T qx = 1.;
+
+        T ry = 0.;
+        T qy = 1.;
+
+        T rz = 0.;
+        T qz = 1.;
+        // clang-format off
+DISABLE_WARN_DIV_BY_ZERO;
+        // clang-format on
+        //if fraction of particle is contained in the next cell assign a fraction of mass
+        double dd = x - ix - 0.5 * (1 - subres);
+        if (dd > 0. && subres > 0) {
+          rx = dd / subres;
+          qx = 1. - rx;
+        }
+
+        dd = y - iy - 0.5 * (1 - subres);
+        if (dd > 0. && subres > 0) {
+          ry = dd / subres;
+          qy = 1. - ry;
+        }
+
+        dd = z - iz - 0.5 * (1 - subres);
+        if (dd > 0. && subres > 0) {
+          rz = dd / subres;
+          qz = 1. - rz;
+        }
+        // clang-format off
+ENABLE_WARN_DIV_BY_ZERO;
+        // clang-format on
+
+        //we need to check for periodicity
+        p(ix, iy, iz);
+
+        //if the particle is fully contained within a voxel
+        //then we can attribute its entire mass to this bin.
+        //otherwise a fraction of mass will be assigned to
+        //the next bin.
+
+        //find next cells
+        size_t jx = (ix + 1);
+        size_t jy = (iy + 1);
+        size_t jz = (iz + 1);
+
+        //check for periodicity
+        p(jx, jy, jz);
+
+        double w = weight[i];
+
+        if (!ignore_overflow) {
+          if (EXTRA_CHECK && jx >= maxX) {
+            Console::instance().print<LOG_ERROR>(
+                boost::format("Overflow at ix=%d, jx=%d (maxX=%d)") % ix % jx %
+                maxX);
+          }
+          if (EXTRA_CHECK && ix < minX) {
+            Console::instance().print<LOG_ERROR>(
+                boost::format("Underflow at ix=%d, jx=%d") % ix % jx);
+          }
+          if (EXTRA_CHECK && ix >= maxX) {
+            Console::instance().print<LOG_ERROR>(
+                boost::format("Overflow at ix=%d, jx=%d with x=%g") % ix % jx %
+                x);
+          }
+          if (EXTRA_CHECK && jy >= maxY) {
+            Console::instance().print<LOG_ERROR>(
+                boost::format("Overflow at iy=%d, jy=%d (maxY=%d)") % iy % jy %
+                maxY);
+          }
+          if (EXTRA_CHECK && iy < minY) {
+            Console::instance().print<LOG_ERROR>(
+                boost::format("Underflow at iy=%d, jy=%d") % iy % jy);
+          }
+          density[ix][iy][iz] += (qx) * (qy) * (qz)*w;
+          density[ix][iy][jz] += (qx) * (qy) * (rz)*w;
+          density[ix][jy][iz] += (qx) * (ry) * (qz)*w;
+          density[ix][jy][jz] += (qx) * (ry) * (rz)*w;
+          density[jx][iy][iz] += (rx) * (qy) * (qz)*w;
+          density[jx][iy][jz] += (rx) * (qy) * (rz)*w;
+          density[jx][jy][iz] += (rx) * (ry) * (qz)*w;
+          density[jx][jy][jz] += (rx) * (ry) * (rz)*w;
+        } else {
+          _safe_set(density, ix, iy, iz, bounds, qx * qy * qz * w);
+          _safe_set(density, ix, iy, jz, bounds, qx * qy * rz * w);
+          _safe_set(density, ix, jy, iz, bounds, qx * ry * qz * w);
+          _safe_set(density, ix, jy, jz, bounds, qx * ry * rz * w);
+
+          _safe_set(density, jx, iy, iz, bounds, rx * qy * qz * w);
+          _safe_set(density, jx, iy, jz, bounds, rx * qy * rz * w);
+          _safe_set(density, jx, jy, iz, bounds, rx * ry * qz * w);
+          _safe_set(density, jx, jy, jz, bounds, rx * ry * rz * w);
+        }
+      }
+    }
+
+    template <typename GradientArray, typename ProjectionDensityArray>
+    static inline void __do_gradient(
+        GradientArray &adj_gradient, const ProjectionDensityArray &density,
+        size_t i, int axis, int ix, int iy, int iz, int jx, int jy, int jz,
+        T rx, T ry, T rz, T qx, T qy, T qz, T global_w) {
+
+      switch (axis) {
+      case 0:
+
+        //Note the derivative of the Heaviside function is zero
+        if (rx > 0. && subres > 0) {
+          rx = 1. / subres;
+          qx = -1. / subres;
+        } else {
+          rx = 0;
+          qx = 0;
+        }
+
+        break;
+      case 1:
+        //Note the derivative of the Heaviside function is zero
+        if (ry > 0. && subres > 0) {
+          ry = 1. / subres;
+          qy = -1. / subres;
+        } else {
+          ry = 0;
+          qy = 0;
+        }
+        break;
+      case 2:
+        //Note the derivative of the Heaviside function is zero
+        if (rz > 0. && subres > 0) {
+          rz = 1. / subres;
+          qz = -1. / subres;
+        } else {
+          rz = 0;
+          qz = 0;
+        }
+        break;
+      }
+
+      double w = density[ix][iy][iz] * qx * qy * qz +
+                 density[ix][iy][jz] * qx * qy * rz +
+                 density[ix][jy][iz] * qx * ry * qz +
+                 density[ix][jy][jz] * qx * ry * rz +
+                 density[jx][iy][iz] * rx * qy * qz +
+                 density[jx][iy][jz] * rx * qy * rz +
+                 density[jx][jy][iz] * rx * ry * qz +
+                 density[jx][jy][jz] * rx * ry * rz;
+
+      adj_gradient[i][axis] += w * global_w;
+    }
+
+    template <
+        typename ParticleArray, typename GradientArray,
+        typename ProjectionDensityArray, typename PeriodicFunction,
+        typename WeightArray>
+    static void adjoint(
+        const ParticleArray &particles, ProjectionDensityArray &density,
+        GradientArray &adjoint_gradient, const WeightArray &weight, T Lx, T Ly,
+        T Lz, int N0, int N1, int N2, const PeriodicFunction &p, T nmean,
+        size_t Np) {
+      ConsoleContext<LOG_DEBUG> ctx("Modified NGP adjoint-projection");
+
+      T inv_dx = N0 / Lx;
+      T inv_dy = N1 / Ly;
+      T inv_dz = N2 / Lz;
+      T inv_nmean = T(1) / nmean;
+      ssize_t minX = density.index_bases()[0];
+      ssize_t minY = density.index_bases()[1];
+      ssize_t minZ = density.index_bases()[2];
+      ssize_t maxX = minX + density.shape()[0];
+      ssize_t maxY = minY + density.shape()[1];
+      ssize_t maxZ = minZ + density.shape()[2];
+
+      ctx.print(
+          boost::format(
+              "Number of particles = %d (array is %d), minX=%d maxX=%d") %
+          Np % particles.shape()[0] % minX % maxX);
+      ctx.print(
+          boost::format("Adjoint gradient = %d") % adjoint_gradient.shape()[0]);
+
+#pragma omp parallel for schedule(static)
+      for (size_t i = 0; i < Np; i++) {
+
+        T x = particles[i][0] * inv_dx;
+        T y = particles[i][1] * inv_dy;
+        T z = particles[i][2] * inv_dz;
+
+        ssize_t ix = (ssize_t)std::floor(x + 0.5 * (1. - subres));
+        ssize_t iy = (ssize_t)std::floor(y + 0.5 * (1. - subres));
+        ssize_t iz = (ssize_t)std::floor(z + 0.5 * (1. - subres));
+
+        T rx = 0.;
+        T qx = 1.;
+
+        T ry = 0.;
+        T qy = 1.;
+
+        T rz = 0.;
+        T qz = 1.;
+
+        double dd = x - ix - 0.5 * (1 - subres);
+        if (dd > 0. && subres > 0) {
+          rx = dd / subres;
+          qx = 1. - rx;
+        }
+
+        dd = y - iy - 0.5 * (1 - subres);
+        if (dd > 0. && subres > 0) {
+          ry = dd / subres;
+          qy = 1. - ry;
+        }
+
+        dd = z - iz - 0.5 * (1 - subres);
+        if (dd > 0. && subres > 0) {
+          rz = dd / subres;
+          qz = 1. - rz;
+        }
+
+        p(ix, iy, iz);
+
+        size_t jx = (ix + 1);
+        size_t jy = (iy + 1);
+        size_t jz = (iz + 1);
+
+        p(jx, jy, jz);
+
+        if (ignore_overflow) {
+          error_helper<ErrorBadState>("Overflow cannot be ignored in adjoint.");
+        }
+
+        if (EXTRA_CHECK && jx >= maxX) {
+          Console::instance().print<LOG_ERROR>(
+              boost::format("Overflow at ix=%d, jx=%d (maxX adj = %d)") % ix %
+              jx % maxX);
+        }
+        if (EXTRA_CHECK && ix < minX) {
+          Console::instance().print<LOG_ERROR>(
+              boost::format("Underflow at ix=%d, jx=%d (adj)") % ix % jx);
+        }
+        if (EXTRA_CHECK && jy >= maxY) {
+          Console::instance().print<LOG_ERROR>(
+              boost::format("Overflow at iy=%d, jy=%d (maxY=%d) adj") % iy %
+              jy % maxY);
+        }
+        if (EXTRA_CHECK && iy < minY) {
+          Console::instance().print<LOG_ERROR>(
+              boost::format("Underflow at iy=%d, jy=%d adj") % iy % jy);
+        }
+        if (EXTRA_CHECK && jz >= maxZ) {
+          Console::instance().print<LOG_ERROR>(
+              boost::format("Overflow at iz=%d, jz=%d (maxZ=%d) adj") % iz %
+              jz % maxZ);
+        }
+        if (EXTRA_CHECK && iz < minZ) {
+          Console::instance().print<LOG_ERROR>(
+              boost::format("Underflow at iz=%d, jz=%d adj") % iz % jz);
+        }
+
+        __do_gradient(
+            adjoint_gradient, density, i, 0, ix, iy, iz, jx, jy, jz, rx, ry, rz,
+            qx, qy, qz, inv_nmean * inv_dx);
+        __do_gradient(
+            adjoint_gradient, density, i, 1, ix, iy, iz, jx, jy, jz, rx, ry, rz,
+            qx, qy, qz, inv_nmean * inv_dy);
+        __do_gradient(
+            adjoint_gradient, density, i, 2, ix, iy, iz, jx, jy, jz, rx, ry, rz,
+            qx, qy, qz, inv_nmean * inv_dz);
+      }
+    }
+  };
+
+  namespace NGPGrid {
+    struct NGP {
+      static constexpr double value = 0.0;
+    };
+    struct CIC {
+      static constexpr double value = 1.0;
+    };
+    struct Double {
+      static constexpr double value = 0.5;
+    };
+    struct Quad {
+      static constexpr double value = 0.25;
+    };
+  } // namespace NGPGrid
+
+  // This implements the ModifiedNGP kernel. By default it acts like a CIC, for an additional cost.
+  // It relies on GenericCIC to implement the missing auxiliary functions from the base function
+  // given in ModifiedNGP_impl
+  template <
+      typename T, typename SubgridSpec = NGPGrid::CIC,
+      bool ignore_overflow = false>
+  class ModifiedNGP
+      : public GenericCIC<
+            T, ModifiedNGP_impl<T, SubgridSpec, ignore_overflow>> {
+  public:
+    typedef ModifiedNGP_impl<T, SubgridSpec, ignore_overflow> Base;
+    typedef T Type;
+
+    // Number of extra ghost planes required in case of MPI. Only post planes are
+    // supported.
+    // In practice only ONE plane is supported at the moment.
+    static const int MPI_PLANE_LEAKAGE = 1;
+    static const int MPI_NEGATIVE_PLANE_LEAKAGE = 0;
+
+    typedef CIC_Tools::Periodic_MPI Periodic_MPI;
+
+    // This defines the policy of load balancing distribution for MNGP.
+    // This class translates the requirements of slabing by FFTW to particle
+    // positions. As we are still using the ghost plane mechanism to adjust for
+    // edge effects this decision class is required to be able to do correct parallel
+    // projection.
+    // Its task is quite minimal as most of the complexity is in "get_peer" and
+    // load balancing in samplers/borg/pm/particle_distribution.hpp
+    struct Distribution {
+      typedef long LongElt;
+      typedef LibLSS::FFTW_Manager_3d<T> Manager;
+
+      std::shared_ptr<Manager> &force_mgr;
+      size_t f_N0;
+      size_t f_startN0;
+      size_t f_localN0;
+      double L0;
+
+      Distribution(
+          std::shared_ptr<Manager> &mgr, double L0, double = 0, double = 0)
+          : force_mgr(mgr), f_N0(mgr->N0), f_startN0(mgr->startN0),
+            f_localN0(mgr->localN0) {
+        this->L0 = L0;
+        Console::instance().print<LOG_DEBUG>(
+            boost::format(
+                "Initialize particle distribution decider: N0 = %d, L0 = %g") %
+            f_N0 % L0);
+      }
+
+      template <typename Position, typename... U>
+      inline LongElt operator()(Position &&pos, U &&...) {
+        T x = pos[0] * f_N0 / L0;
+        LongElt i0 = LongElt(std::floor(x + 0.5 * (1. - Base::subres))) % f_N0;
+        LongElt peer = force_mgr->get_peer(i0);
+        //Console::instance().print<LOG_DEBUG>(boost::format("Pos %g, peer = %d") % x % peer);
+        return peer;
+      }
+    };
+  };
+} // namespace LibLSS
+
+#endif
--- a/libLSS/physics/modified_ngp_smooth.hpp
+++ b/libLSS/physics/modified_ngp_smooth.hpp
@ -0,0 +1,366 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/modified_ngp_smooth.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_PHYSICS_SMOOTH_MODIFIED_NGP_HPP
+#define __LIBLSS_PHYSICS_SMOOTH_MODIFIED_NGP_HPP
+
+#include <cmath>
+#include <CosmoTool/algo.hpp>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/physics/generic_cic.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+
+namespace LibLSS {
+
+    template<typename T, typename SubgridSpec>
+    struct SmoothModifiedNGP_impl {
+        typedef T Type;
+        // Number of extra planes required in case of MPI
+        static const int MPI_PLANE_LEAKAGE = 1;
+        static const bool EXTRA_CHECK = true;
+
+        //get virtual grid spacing
+        //for testing we choose subres=1. this should reprodice CIC
+        //particles will be assumed to be little boxes of size dx*subres
+        // subres = 1 corresponds to CIC
+        // subres -> 0 approaches NGP
+        static constexpr double subres=SubgridSpec::value;
+
+        static constexpr double C0 = (1. - subres)/6.;
+
+        static inline T kernel(T delta) {
+            double const a = subres > 0 ? delta/subres : 0;
+            if (a < 0.5 && a > -0.5) {
+               return 0.5 + (a  - CosmoTool::cube(2*a) * C0);
+            } else if (a > 0.5) {
+                return 1 - 8 * C0 * CosmoTool::cube((0.5 - delta)/(1-subres));
+            } else {
+                return 8 * C0 * CosmoTool::cube((0.5 + delta)/(1-subres));
+            }
+        }
+
+        static inline T adjoint(T delta) {
+            double const a = subres > 0 ? delta/subres : 0;
+            if (a < 0.5 && a > -0.5) {
+               return (1 - (6*C0)*CosmoTool::square(2*a))/subres;
+            } else if (a > 0.5) {
+                return (24 * C0/(1-subres)) * CosmoTool::square((0.5 - delta)/(1-subres));
+            } else {
+                return (24 * C0/(1-subres)) * CosmoTool::square((0.5 + delta)/(1-subres));
+            }
+        }
+
+
+        // This function implements the particle projection to a grid.
+        // Arguments:
+        //   - particles (2d array: Nx3)
+        //   - density (3d array: N0xN1xN2, or slice thereof)
+        //   - Lx, Ly, Lz: physical size
+        //   - N0, N1, N2: grid size
+        //   - p: a function applying optional periodic boundary enforcement (depends on MPI for ghost plane)
+        //   - weight: per-particle weight functor, maybe returning only "1"
+        //   - Np: number of particles to project
+        template<typename ParticleArray, typename ProjectionDensityArray, typename WeightArray,
+                 typename PeriodicFunction >
+        static void projection(const ParticleArray& particles, ProjectionDensityArray& density,
+                               T Lx, T Ly, T Lz,
+                               int N0, int N1, int N2, const PeriodicFunction& p, const WeightArray& weight, size_t Np) {
+            ConsoleContext<LOG_DEBUG> ctx("Modified NGP projection");
+
+            T inv_dx = N0/Lx;
+            T inv_dy = N1/Ly;
+            T inv_dz = N2/Lz;
+
+            int minX = density.index_bases()[0];
+            int minY = density.index_bases()[1];
+            int minZ = density.index_bases()[2];
+            int maxX = density.index_bases()[0] + density.shape()[0];
+            int maxY = density.index_bases()[1] + density.shape()[1];
+            int maxZ = density.index_bases()[2] + density.shape()[2];
+
+            ctx.print(boost::format("minX=%d, maxX=%d, N0=%d") % minX % maxX % N0);
+
+            for (long i = 0; i < Np; i++) {
+
+                //divide particle positions by target grid-size
+                //Note: all integer numbers are therefore defined at target resolution
+                T x = particles[i][0]*inv_dx;
+                T y = particles[i][1]*inv_dy;
+                T z = particles[i][2]*inv_dz;
+
+                //Note, we want to find the nearest lower left corner of a voxel that fully contains
+                //the box-shapep particle.
+                //we therefore have to find the nearest voxel for the lower left corner of the particel box
+
+                size_t ix = (size_t)std::floor(x); //the offset of half a subresolution factor
+                size_t iy = (size_t)std::floor(y); //ensures the edges of the particle cloud are within
+                size_t iz = (size_t)std::floor(z); //the lower voxel boundaries
+
+                T rx, qx;
+                T ry, qy;
+                T rz, qz;
+
+                // dx > 0 by construction. delta is taken with respect to the center
+                // dx = ix+0.5 - x
+                qx = kernel((double(ix)-x) + 0.5);
+                rx = 1-qx;
+                qy = kernel((double(iy)-y) + 0.5);
+                ry = 1-qy;
+                qz = kernel((double(iz)-z) + 0.5);
+                rz = 1-qz;
+
+                //we need to check for periodicity
+                p(ix, iy, iz);
+
+                //if the particle is fully contained within a voxel
+                //then we can attribute its entire mass to this bin.
+                //otherwise a fraction of mass will be assigned to
+                //the next bin.
+
+                //find next cells
+                size_t jx = (ix+1);
+                size_t jy = (iy+1);
+                size_t jz = (iz+1);
+
+                //check for periodicity
+                p(jx, jy, jz);
+
+                double w = weight[i];
+
+                if (EXTRA_CHECK && jx >= maxX) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Overflow at ix=%d, jx=%d (maxX=%d)") % ix % jx % maxX);
+                }
+                if (EXTRA_CHECK && ix < minX) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Underflow at ix=%d, jx=%d") % ix % jx);
+                }
+                if (EXTRA_CHECK && ix >= maxX) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Overflow at ix=%d, jx=%d with x=%g") % ix % jx % x);
+                }
+                if (EXTRA_CHECK && jy >= maxY) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Overflow at iy=%d, jy=%d (maxY=%d)") % iy % jy % maxY);
+                }
+                if (EXTRA_CHECK && iy < minY) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Underflow at iy=%d, jy=%d") % iy % jy);
+                }
+
+                density[ix][iy][iz] += (  qx)*(  qy)*(  qz)*w;
+                density[ix][iy][jz] += (  qx)*(  qy)*(  rz)*w;
+                density[ix][jy][iz] += (  qx)*(  ry)*(  qz)*w;
+                density[ix][jy][jz] += (  qx)*(  ry)*(  rz)*w;
+                density[jx][iy][iz] += (  rx)*(  qy)*(  qz)*w;
+                density[jx][iy][jz] += (  rx)*(  qy)*(  rz)*w;
+                density[jx][jy][iz] += (  rx)*(  ry)*(  qz)*w;
+                density[jx][jy][jz] += (  rx)*(  ry)*(  rz)*w;
+            }
+
+        }
+
+
+        template<typename GradientArray, typename ProjectionDensityArray>
+        static inline void __do_gradient(GradientArray& adj_gradient,
+                                  const ProjectionDensityArray& density,
+                                  size_t i,
+                                  int axis,
+                                  int ix, int iy, int iz,
+                                  int jx, int jy, int jz,
+                                  T dx, T dy, T dz,
+                                  T rx, T ry, T rz, T qx, T qy, T qz, T global_w)
+        {
+
+            switch (axis) {
+                case 0:
+                    qx = -adjoint(dx);
+                    rx= -qx;
+                    break;
+                case 1:
+                    qy = -adjoint(dy);
+                    ry= -qy;
+                    break;
+                case 2:
+                    qz = -adjoint(dz);
+                    rz= -qz;
+                    break;
+            }
+
+            double w =
+                density[ix][iy][iz] * qx * qy * qz +
+                density[ix][iy][jz] * qx * qy * rz +
+                density[ix][jy][iz] * qx * ry * qz +
+                density[ix][jy][jz] * qx * ry * rz +
+                density[jx][iy][iz] * rx * qy * qz +
+                density[jx][iy][jz] * rx * qy * rz +
+                density[jx][jy][iz] * rx * ry * qz +
+                density[jx][jy][jz] * rx * ry * rz;
+
+            adj_gradient[i][axis] += w*global_w;
+        }
+
+        template<typename ParticleArray, typename GradientArray, typename ProjectionDensityArray, typename PeriodicFunction, typename WeightArray>
+        static void adjoint(const ParticleArray& particles, ProjectionDensityArray& density,
+                            GradientArray& adjoint_gradient, const WeightArray& weight,
+                            T Lx, T Ly, T Lz,
+                            int N0, int N1, int N2,
+                            const PeriodicFunction& p,
+                            T nmean, size_t Np) {
+            ConsoleContext<LOG_DEBUG> ctx("Modified NGP adjoint-projection");
+
+            T inv_dx = N0/Lx;
+            T inv_dy = N1/Ly;
+            T inv_dz = N2/Lz;
+            T inv_nmean = T(1)/nmean;
+            int minX = density.index_bases()[0];
+            int minY = density.index_bases()[1];
+            int minZ = density.index_bases()[2];
+            int maxX = minX + density.shape()[0];
+            int maxY = minY + density.shape()[1];
+            int maxZ = minZ + density.shape()[2];
+
+            ctx.print(boost::format("Number of particles = %d (array is %d), minX=%d maxX=%d") % Np %particles.shape()[0] % minX % maxX);
+            ctx.print(boost::format("Adjoint gradient = %d") % adjoint_gradient.shape()[0]);
+
+#pragma omp parallel for schedule(static)
+            for (size_t i = 0; i < Np; i++) {
+
+                T x = particles[i][0]*inv_dx;
+                T y = particles[i][1]*inv_dy;
+                T z = particles[i][2]*inv_dz;
+
+                size_t ix = (size_t)std::floor(x);
+                size_t iy = (size_t)std::floor(y);
+                size_t iz = (size_t)std::floor(z);
+
+                T rx, qx;
+                T ry, qy;
+                T rz, qz;
+		T dx = (double(ix)-x)+0.5;
+		T dy = (double(iy)-y)+0.5;
+		T dz = (double(iz)-z)+0.5;
+
+                qx = kernel(dx);
+                rx = 1-qx;
+                qy = kernel(dy);
+                ry = 1-qy;
+                qz = kernel(dz);
+                rz = 1-qz;
+
+                p(ix, iy, iz);
+
+                size_t jx = (ix+1);
+                size_t jy = (iy+1);
+                size_t jz = (iz+1);
+
+                p(jx, jy, jz);
+
+                if (EXTRA_CHECK && jx >= maxX) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Overflow at ix=%d, jx=%d (maxX adj = %d)") % ix % jx % maxX);
+                }
+                if (EXTRA_CHECK &&ix < minX) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Underflow at ix=%d, jx=%d (adj)") % ix % jx);
+                }
+                if (EXTRA_CHECK &&jy >= maxY) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Overflow at iy=%d, jy=%d (maxY=%d) adj") % iy % jy % maxY);
+                }
+                if (EXTRA_CHECK && iy < minY) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Underflow at iy=%d, jy=%d adj") % iy % jy);
+                }
+                if (EXTRA_CHECK && jz >= maxZ) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Overflow at iz=%d, jz=%d (maxZ=%d) adj") % iz % jz % maxZ);
+                }
+                if (EXTRA_CHECK && iz < minZ) {
+                    Console::instance().print<LOG_ERROR>(boost::format("Underflow at iz=%d, jz=%d adj") % iz % jz);
+                }
+
+                __do_gradient(adjoint_gradient, density, i, 0, ix, iy, iz, jx, jy, jz, dx, dy, dz, rx, ry, rz, qx, qy, qz, inv_nmean*inv_dx);
+                __do_gradient(adjoint_gradient, density, i, 1, ix, iy, iz, jx, jy, jz, dx, dy, dz, rx, ry, rz, qx, qy, qz, inv_nmean*inv_dy);
+                __do_gradient(adjoint_gradient, density, i, 2, ix, iy, iz, jx, jy, jz, dx, dy, dz, rx, ry, rz, qx, qy, qz, inv_nmean*inv_dz);
+            }
+
+        }
+
+    };
+
+
+    namespace SmoothNGPGrid {
+      struct CIC { static constexpr double value = 1; };
+      struct Double { static constexpr double value = 0.5; };
+      struct Quad { static constexpr double value = 0.3; };
+    }
+
+
+    // This implements the ModifiedNGP kernel. By default it acts like a CIC, for an additional cost.
+    // It relies on GenericCIC to implement the missing auxiliary functions from the base function
+    // given in ModifiedNGP_impl
+    template<typename T,typename SubgridSpec = SmoothNGPGrid::CIC>
+    class SmoothModifiedNGP: public GenericCIC<T, SmoothModifiedNGP_impl<T,SubgridSpec> > {
+    public:
+        typedef SmoothModifiedNGP_impl<T,SubgridSpec> Base;
+        typedef T Type;
+
+        // Number of extra ghost planes required in case of MPI. Only post planes are
+        // supported.
+        // In practice only ONE plane is supported at the moment.
+        static const int MPI_PLANE_LEAKAGE = 1;
+        static const int MPI_NEGATIVE_PLANE_LEAKAGE = 1;
+
+        struct Periodic_MPI
+        {
+          bool start;
+          size_t N0, N1, N2;
+
+          Periodic_MPI(size_t _N0, size_t _N1, size_t _N2, MPI_Communication *comm)
+            : N0(_N0), N1(_N1), N2(_N2) {
+            start = comm->rank() == 0;
+          }
+
+          void operator()(size_t& i, size_t& j, size_t& k) const {
+            if (start)
+              if (i >= N0) i %= N0;
+            if (j >= N1) j %= N1;
+            if (k >= N2) k %= N2;
+          }
+        };
+
+        // This defines the policy of load balancing distribution for MNGP.
+        // This class translates the requirements of slabing by FFTW to particle
+        // positions. As we are still using the ghost plane mechanism to adjust for
+        // edge effects this decision class is required to be able to do correct parallel
+        // projection.
+        // Its task is quite minimal as most of the complexity is in "get_peer" and
+        // load balancing in samplers/borg/pm/particle_distribution.hpp
+        struct Distribution {
+            typedef long LongElt;
+            typedef LibLSS::FFTW_Manager_3d<T> Manager;
+
+	    std::shared_ptr<Manager>& force_mgr;
+            size_t f_N0;
+            size_t f_startN0;
+            size_t f_localN0;
+            double L0;
+
+            Distribution(std::shared_ptr<Manager>& mgr, double L0, double = 0, double = 0)
+                : force_mgr(mgr), f_N0(mgr->N0), f_startN0(mgr->startN0), 
+                  f_localN0(mgr->localN0) {
+                  this->L0 = L0;
+                  Console::instance().print<LOG_DEBUG>(boost::format("Initialize particle distribution decisioner: N0 = %d, L0 = %g") % f_N0 % L0);
+            }
+
+            template<typename Position, typename... U>
+            inline LongElt operator()(Position&& pos, U&&...) {
+                T x = pos[0]*f_N0/L0;
+                LongElt i0 = LongElt(std::floor(x))  % f_N0;
+                LongElt peer = force_mgr->get_peer(i0);
+                //Console::instance().print<LOG_DEBUG>(boost::format("Pos %g, peer = %d") % x % peer);
+                return peer;
+            }
+        };
+
+    };
+}
+
+#endif
--- a/libLSS/physics/openmp_cic.hpp
+++ b/libLSS/physics/openmp_cic.hpp
@ -0,0 +1,328 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/physics/openmp_cic.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_PHYSICS_OPENMP_CIC_HPP
+#define __LIBLSS_PHYSICS_OPENMP_CIC_HPP
+
+#include <cmath>
+#include "libLSS/tools/console.hpp"
+#include <boost/multi_array.hpp>
+#include <CosmoTool/omptl/omptl>
+#include <CosmoTool/omptl/omptl_algorithm>
+#include <iostream>
+#include "libLSS/tools/array_tools.hpp"
+#include "libLSS/physics/generic_cic.hpp"
+
+namespace LibLSS {
+
+  template <typename T>
+  struct OpenMPCloudInCell_impl {
+    typedef T Type;
+    // Number of extra planes required in case of MPI
+    static const int MPI_PLANE_LEAKAGE = 1;
+    typedef boost::multi_array<int, 1> ListArray;
+    typedef boost::multi_array<int, 1> AtomicListArray;
+
+    template <
+        typename ParticleArray, typename ProjectionDensityArray,
+        typename WeightArray, typename PeriodicFunction>
+    static void projection(
+        const ParticleArray &particles, ProjectionDensityArray &density, T Lx,
+        T Ly, T Lz, size_t N0, size_t N1, size_t N2, const PeriodicFunction &p,
+        const WeightArray &weight, size_t Np) {
+      using boost::extents;
+
+      ConsoleContext<LOG_DEBUG> ctx("OpenMP CIC projection");
+
+      T inv_dx = N0 / Lx;
+      T inv_dy = N1 / Ly;
+      T inv_dz = N2 / Lz;
+
+      typedef UninitializedArray<AtomicListArray> U_AtomicListArray;
+      typedef UninitializedArray<ListArray> U_ListArray;
+      U_AtomicListArray part_mesh_p(extents[long(N0) * long(N1) * long(N2)]);
+      U_ListArray part_list_p(extents[Np]);
+      U_AtomicListArray::array_type &part_mesh = part_mesh_p.get_array();
+      U_ListArray::array_type &part_list = part_list_p.get_array();
+      long Nmesh = part_mesh.num_elements();
+
+      {
+        ConsoleContext<LOG_DEBUG> ctx0("initialize arrays");
+        array::fill(part_mesh, -1);
+        array::fill(part_list, -1);
+      }
+
+      {
+        ConsoleContext<LOG_DEBUG> ctx0("build mesh list");
+// First build part -> mesh list
+#pragma omp parallel for schedule(static)
+        for (size_t i_part = 0; i_part < Np; i_part++) {
+
+          T x = particles[i_part][0] * inv_dx;
+          T y = particles[i_part][1] * inv_dy;
+          T z = particles[i_part][2] * inv_dz;
+
+          size_t ix = (size_t)std::floor(x);
+          size_t iy = (size_t)std::floor(y);
+          size_t iz = (size_t)std::floor(z);
+
+          size_t idx = iz + N2 * iy + N2 * N1 * ix;
+
+          int initial_elt =
+              __atomic_exchange_n(&part_mesh[idx], i_part, __ATOMIC_RELAXED);
+          if (initial_elt != -1) {
+            part_list[i_part] = initial_elt;
+          }
+        }
+      }
+
+      {
+        ConsoleContext<LOG_DEBUG> ctx0("reverse list");
+
+        // We built the list in the incorrect order, reverse it as fast as we can
+#pragma omp parallel for schedule(dynamic, 10000)
+        for (size_t mid = 0; mid < Nmesh; mid++) {
+          int current_part = part_mesh[mid];
+
+          if (current_part >= 0) {
+            int next_part = part_list[current_part];
+
+            part_list[current_part] = -1;
+            while (next_part != -1) {
+              int p = part_list[next_part];
+              part_list[next_part] = current_part;
+              current_part = next_part;
+              next_part = p;
+            }
+            part_mesh[mid] = current_part;
+          }
+        }
+      }
+
+      {
+        ConsoleContext<LOG_DEBUG> ctx0("projection");
+
+#pragma omp parallel
+        {
+
+          for (int looper0 = 0; looper0 < 2; looper0++) {
+            for (int looper1 = 0; looper1 < 2; looper1++) {
+              for (int looper2 = 0; looper2 < 2; looper2++) {
+
+                int r[3] = {looper0, looper1, looper2};
+
+#pragma omp barrier
+#pragma omp for schedule(dynamic, 10000)
+                for (long mid = 0; mid < Nmesh; mid++) {
+                  int mz = mid % N2;
+                  int my = (mid / N2) % N1;
+                  int mx = (mid / (N2 * N1));
+                  int i_part = part_mesh[mid];
+
+                  T w = 0;
+
+                  while (i_part != -1) {
+                    T w0 = 1;
+                    T x = particles[i_part][0] * inv_dx;
+                    T y = particles[i_part][1] * inv_dy;
+                    T z = particles[i_part][2] * inv_dz;
+                    T qx = std::floor(x);
+                    T qy = std::floor(y);
+                    T qz = std::floor(z);
+                    T dx = x - qx;
+                    T dy = y - qy;
+                    T dz = z - qz;
+                    w0 = (r[0] == 1) ? dx : (T(1) - dx);
+                    w0 *= (r[1] == 1) ? dy : (T(1) - dy);
+                    w0 *= (r[2] == 1) ? dz : (T(1) - dz);
+                    w += w0 * weight[i_part];
+                    i_part = part_list[i_part];
+                  }
+
+                  size_t tx = (mx + looper0);
+                  size_t ty = (my + looper1);
+                  size_t tz = (mz + looper2);
+                  p(tx, ty, tz);
+                  density[tx][ty][tz] += w;
+                }
+              }
+            }
+          }
+        }
+#pragma omp barrier
+      }
+    }
+
+    template <
+        typename GradientArray, typename ProjectionDensityArray,
+        typename WeightArray>
+    static inline
+        typename std::enable_if<WeightArray::dimensionality == 1>::type
+        __do_gradient(
+            GradientArray &adj_gradient, const ProjectionDensityArray &density,
+            WeightArray const &a_w, size_t i, int axis, size_t ix, size_t iy,
+            size_t iz, size_t jx, size_t jy, size_t jz, T x, T y, T z,
+            T global_w) {
+      T rx, ry, rz;
+      T qx, qy, qz;
+
+      switch (axis) {
+      case 0:
+        rx = 1;
+        qx = -1;
+        ry = y - iy;
+        qy = 1 - ry;
+        rz = z - iz;
+        qz = 1 - rz;
+        break;
+      case 1:
+        rx = x - ix;
+        qx = 1 - rx;
+        ry = 1;
+        qy = -1;
+        rz = z - iz;
+        qz = 1 - rz;
+        break;
+      case 2:
+        rx = x - ix;
+        qx = 1 - rx;
+        ry = y - iy;
+        qy = 1 - ry;
+        rz = 1;
+        qz = -1;
+        break;
+      }
+
+      double w = density[ix][iy][iz] * qx * qy * qz +
+                 density[ix][iy][jz] * qx * qy * rz +
+                 density[ix][jy][iz] * qx * ry * qz +
+                 density[ix][jy][jz] * qx * ry * rz +
+                 density[jx][iy][iz] * rx * qy * qz +
+                 density[jx][iy][jz] * rx * qy * rz +
+                 density[jx][jy][iz] * rx * ry * qz +
+                 density[jx][jy][jz] * rx * ry * rz;
+
+      adj_gradient[i][axis] = a_w[axis] * w * global_w;
+    }
+
+    template <typename GradientArray, typename ProjectionDensityArray>
+    static inline void __do_gradient(
+        GradientArray &adj_gradient, const ProjectionDensityArray &density,
+        T a_w, size_t i, int axis, size_t ix, size_t iy, size_t iz, size_t jx,
+        size_t jy, size_t jz, T x, T y, T z, T global_w) {
+      T rx, ry, rz;
+      T qx, qy, qz;
+
+      switch (axis) {
+      case 0:
+        rx = 1;
+        qx = -1;
+        ry = y - iy;
+        qy = 1 - ry;
+        rz = z - iz;
+        qz = 1 - rz;
+        break;
+      case 1:
+        rx = x - ix;
+        qx = 1 - rx;
+        ry = 1;
+        qy = -1;
+        rz = z - iz;
+        qz = 1 - rz;
+        break;
+      case 2:
+        rx = x - ix;
+        qx = 1 - rx;
+        ry = y - iy;
+        qy = 1 - ry;
+        rz = 1;
+        qz = -1;
+        break;
+      }
+
+      double w = density[ix][iy][iz] * qx * qy * qz +
+                 density[ix][iy][jz] * qx * qy * rz +
+                 density[ix][jy][iz] * qx * ry * qz +
+                 density[ix][jy][jz] * qx * ry * rz +
+                 density[jx][iy][iz] * rx * qy * qz +
+                 density[jx][iy][jz] * rx * qy * rz +
+                 density[jx][jy][iz] * rx * ry * qz +
+                 density[jx][jy][jz] * rx * ry * rz;
+
+      adj_gradient[i][axis] += a_w * w * global_w;
+    }
+
+    template <
+        typename ParticleArray, typename ProjectionDensityArray,
+        typename GradientArray, typename PeriodicFunction, typename WeightArray>
+    static void adjoint(
+        const ParticleArray &particles, ProjectionDensityArray &density,
+        GradientArray &adjoint_gradient, const WeightArray &w, T Lx, T Ly, T Lz,
+        size_t N0, size_t N1, size_t N2, const PeriodicFunction &p, T nmean,
+        size_t Np) {
+      ConsoleContext<LOG_DEBUG> ctx("Classic CIC adjoint-projection");
+
+      T inv_dx = N0 / Lx;
+      T inv_dy = N1 / Ly;
+      T inv_dz = N2 / Lz;
+      T inv_nmean = 1 / nmean;
+      size_t minX = density.index_bases()[0], minY = density.index_bases()[1],
+             minZ = density.index_bases()[2],
+             maxX = density.index_bases()[0] + density.shape()[0],
+             maxY = density.index_bases()[1] + density.shape()[1],
+             maxZ = density.index_bases()[2] + density.shape()[2];
+
+#pragma omp parallel for schedule(static)
+      for (long i = 0; i < Np; i++) {
+
+        T x = particles[i][0] * inv_dx;
+        T y = particles[i][1] * inv_dy;
+        T z = particles[i][2] * inv_dz;
+
+        size_t ix = (size_t)std::floor(x);
+        size_t iy = (size_t)std::floor(y);
+        size_t iz = (size_t)std::floor(z);
+
+        size_t jx = (ix + 1);
+        size_t jy = (iy + 1);
+        size_t jz = (iz + 1);
+
+        p(jx, jy, jz);
+
+        if (ix < minX || ix >= maxX || iy < minY || iy >= maxY || iz < minZ ||
+            iz >= maxZ)
+          continue;
+
+        __do_gradient(
+            adjoint_gradient, density, w[i], i, 0, ix, iy, iz, jx, jy, jz, x, y,
+            z, inv_dx * inv_nmean);
+        __do_gradient(
+            adjoint_gradient, density, w[i], i, 1, ix, iy, iz, jx, jy, jz, x, y,
+            z, inv_dy * inv_nmean);
+        __do_gradient(
+            adjoint_gradient, density, w[i], i, 2, ix, iy, iz, jx, jy, jz, x, y,
+            z, inv_dz * inv_nmean);
+      }
+    }
+  };
+
+  template <typename T>
+  class OpenMPCloudInCell : public GenericCIC<T, OpenMPCloudInCell_impl<T>> {
+  public:
+    typedef T Type;
+    // Number of extra planes required in case of MPI
+    static const int MPI_PLANE_LEAKAGE = 1;
+    static const int MPI_NEGATIVE_PLANE_LEAKAGE = 0;
+    typedef CIC_Distribution<T> Distribution;
+    typedef CIC_Tools::Periodic_MPI Periodic_MPI;
+  };
+
+} // namespace LibLSS
+
+#endif
--- a/libLSS/samplers/ares/ares_bias.hpp
+++ b/libLSS/samplers/ares/ares_bias.hpp
@ -0,0 +1,42 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/ares_bias.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_ARES_BIAS_HPP
+#define __LIBLSS_ARES_BIAS_HPP
+
+#include "libLSS/tools/console.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/mcmc/global_state.hpp"
+#include <boost/format.hpp>
+
+namespace LibLSS {
+  namespace ARES {
+    inline double& extract_bias(MarkovState& state, int c)
+    {
+      using boost::format;
+      return (*state.get<ArrayType1d>(format("galaxy_bias_%d") % c)->array)[0];
+    }
+    
+    template<typename InitializerArray>
+    void ensure_bias_size(MarkovState& state, unsigned int c, const InitializerArray& init_a)
+    {
+      using boost::format;
+      auto& a = (*state.get<ArrayType1d>(format("galaxy_bias_%d") % c)->array);
+      size_t old_sz = a.size();
+      if (old_sz < init_a.size()) {
+        a.resize(boost::extents[init_a.size()]);
+        for (size_t i = old_sz; i < init_a.size(); i++)
+	  a[i] = init_a[i];	
+      }
+    }
+
+  }
+}
+
+#endif
--- a/libLSS/samplers/ares/gibbs_messenger.cpp
+++ b/libLSS/samplers/ares/gibbs_messenger.cpp
@ -0,0 +1,536 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/gibbs_messenger.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <Eigen/Core>
+#include <cmath>
+#include <boost/format.hpp>
+#include <CosmoTool/fourier/fft/fftw_calls.hpp>
+#include "libLSS/samplers/core/random_number.hpp"
+#include "libLSS/samplers/ares/gibbs_messenger.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include "libLSS/samplers/ares/ares_bias.hpp"
+#include "libLSS/tools/fused_array.hpp"
+#include "libLSS/tools/fused_assign.hpp"
+#include "libLSS/tools/array_tools.hpp"
+
+using namespace LibLSS;
+using boost::format;
+using boost::extents;
+using LibLSS::ARES::extract_bias;
+
+typedef boost::multi_array_types::extent_range range;
+
+typedef Eigen::Map<Eigen::ArrayXd, Eigen::Aligned> MappedArray;
+
+/* (data,s) -> t sampler
+ */
+
+MessengerSampler::MessengerSampler(MPI_Communication * comm_)
+    : comm(comm_), constrainedGeneration(true), mgr(0)
+{
+}
+
+MessengerSampler::~MessengerSampler()
+{
+  if (mgr != 0)
+    delete mgr;
+}
+
+void MessengerSampler::restore(MarkovState& state)
+{
+    initialize(state);
+}
+
+void MessengerSampler::initialize(MarkovState& state)
+{
+    ArrayType *messenger;
+    Console& cons = Console::instance();
+
+    cons.print<LOG_INFO>("Initialize Messenger sampler");
+    cons.indent();
+
+    N0 = state.get<SLong>("N0")->value;
+    N1 = state.get<SLong>("N1")->value;
+    N2 = state.get<SLong>("N2")->value;
+
+    mgr = new FFTMgr(N0, N1, N2, comm);
+
+    startN0 = mgr->startN0;
+    localN0 = mgr->localN0;
+
+    Ntot = N0*N1*N2;
+    localNtot = localN0*N1*N2;
+
+    N_k = state.get<SLong>("NUM_MODES")->value;
+
+    rng = state.get<RandomGen>("random_generator");
+
+    cons.print<LOG_DEBUG>("Allocating messenger field");
+    messenger = new ArrayType(extents[range(startN0,startN0+localN0)][N1][N2]);
+    messenger->setRealDims(ArrayDimension(N0, N1, N2));
+    cons.print<LOG_DEBUG>(format("Allocated messenger_field %p") % messenger->array->data());
+    cons.print<LOG_DEBUG>("Allocating messenger field");
+    messenger_mask = new ArrayType(extents[range(startN0,startN0+localN0)][N1][N2]);
+    messenger_mask->setRealDims(ArrayDimension(N0, N1, N2));
+    cons.print<LOG_DEBUG>("Allocating mixed data field");
+    data_field = new ArrayType(extents[range(startN0,startN0+localN0)][N1][N2]);
+    data_field->setRealDims(ArrayDimension(N0, N1, N2));
+
+    state.newElement("messenger_field", messenger);
+    state.newElement("messenger_mask", messenger_mask);
+    state.newElement("messenger_tau", messenger_tau = new SDouble());
+    state.newElement("data_field", data_field);
+
+    cons.unindent();
+    cons.print<LOG_INFO>("Done");
+}
+
+void MessengerSampler::sample(MarkovState& state)
+{
+    ConsoleContext<LOG_DEBUG> ctx("MessengerSampler::sample");
+    ArrayType& s_field = static_cast<ArrayType&>(state["s_field"]);
+    ArrayType::ArrayType& m_field = *state.get<ArrayType>("messenger_field")->array;
+    ArrayType& data_field = static_cast<ArrayType&>(state["data_field"]);
+    // We need the 3d messenger mask/window
+    ArrayType& W = *messenger_mask;
+    // We need the random generator
+    SDouble& tau = *messenger_tau;
+    double sqrt_tau = std::sqrt(tau);
+
+    if (constrainedGeneration) {
+#pragma omp parallel
+{
+        auto &rng_g = rng->get();
+        const auto &W_tmp = W.array->data();
+        const auto &s_tmp = s_field.array->data();
+        const auto &d_tmp = data_field.array->data();
+        const auto &m_tmp = m_field.data();
+#pragma omp for schedule(static)
+        for (long i = 0; i < localNtot; i++) {
+            double A = rng_g.gaussian();
+            double Wi = W_tmp[i];
+            double si = s_tmp[i];
+            double di = d_tmp[i];
+            double mu, sigma;
+
+            if (Wi > 0) {
+                mu = (si * Wi + tau * di) / (Wi + tau);
+                sigma = std::sqrt( (Wi*tau) / (Wi + tau) );
+            } else if (Wi < 0){
+                mu = si;
+                sigma = sqrt_tau;
+            } else {
+                mu = di;
+                sigma = 0;
+            }
+
+            m_tmp[i] = mu + sigma * A;
+        }
+} // end of parallel region
+    } else {
+        for (long i = 0; i < localNtot; i++) {
+            double A = rng->get().gaussian();
+            double Wi = W.array->data()[i];
+            double m_i = m_field.data()[i];
+            double& di = data_field.array->data()[i];
+
+            if (Wi > 0)
+                di = m_i + std::sqrt(Wi)*A;
+            else
+                di = 0;
+            Console::instance().c_assert(!std::isnan(di), "Data is a NaN");
+        }
+    }
+}
+
+
+/* t-> s sampler
+ */
+
+MessengerSignalSampler::MessengerSignalSampler(MPI_Communication* comm)
+    : flat_key(0), tmp_fourier(0), tmp_fourier_m(0), tmp_m_field(0), tmp_real_field(0), analysis_plan(0), synthesis_plan(0),
+      constrainedGeneration(true), comm(comm), mgr(0)
+{
+}
+
+void MessengerSignalSampler::restore(MarkovState& state)
+{
+    initialize(state);
+}
+
+void MessengerSignalSampler::initialize(MarkovState& state)
+{
+    Console& cons = Console::instance();
+    ConsoleContext<LOG_INFO> ctx("Messenger-Signal sampler");
+
+    N0 = static_cast<SLong&>(state["N0"]);
+    N1 = static_cast<SLong&>(state["N1"]);
+    N2 = static_cast<SLong&>(state["N2"]);
+
+    mgr = new FFTMgr(N0, N1, N2, comm);
+
+    // This for MPI support
+    startN0 = mgr->startN0;
+    localN0 = mgr->localN0;
+    fourierLocalSize = mgr->allocator_real.minAllocSize;
+
+    N_k = state.get<SLong>("NUM_MODES")->value;
+
+    L0 = static_cast<SDouble&>(state["L0"]);
+    L1 = static_cast<SDouble&>(state["L1"]);
+    L2 = static_cast<SDouble&>(state["L2"]);
+
+    if (tmp_fourier) {
+        error_helper<ErrorBadState>("MessengerSignalSampler has already been initialized.");
+    }
+
+
+    cons.print<LOG_DEBUG>("Allocating x field");
+    x_field = new ArrayType(extents[range(startN0,startN0+localN0)][N1][N2]);
+    x_field->setRealDims(ArrayDimension(N0, N1, N2));
+    cons.print<LOG_DEBUG>("Allocating s field");
+    s_field = new ArrayType(extents[range(startN0,startN0+localN0)][N1][N2]);
+    s_field->setRealDims(ArrayDimension(N0, N1, N2));
+    state.newElement("x_field", x_field);
+    state.newElement("s_field", s_field, true);
+
+    s_field->eigen().fill(0);
+    x_field->eigen().fill(0);
+
+    Ntot = N0*N1*N2;
+    Ntot_k = N0*N1*(N2/2+1);
+
+    localNtot = localN0*N1*N2;
+    localNtot_k = localN0*N1*(N2/2+1);
+
+    volume = L0*L1*L2;
+    volNorm = volume/Ntot;
+
+    ctx.print(format("fourierLocalSize = %d") % fourierLocalSize);
+    tmp_fourier = MFCalls::alloc_complex(fourierLocalSize);
+    tmp_fourier_m = MFCalls::alloc_complex(fourierLocalSize);
+
+
+#ifndef ARES_MPI_FFTW
+    ctx.print("Creating FFTW plans for Messenger-Signal");
+    tmp_m_field = new ArrayType(boost::extents[range(startN0,startN0+localN0)][N1][N2]);
+    ctx.print(format("Allocated tmp_m_field %p") % tmp_m_field->array->origin());
+    analysis_plan = MFCalls::plan_dft_r2c_3d(
+                      N0, N1, N2,
+                      x_field->array->data(),
+                      tmp_fourier,
+                      FFTW_DESTROY_INPUT|FFTW_MEASURE);
+    synthesis_plan = MFCalls::plan_dft_c2r_3d(
+                       N0, N1, N2,
+                       tmp_fourier,
+                       x_field->array->data(),
+                       FFTW_DESTROY_INPUT|FFTW_MEASURE);
+#else
+    ctx.print("Creating MPI/FFTW plans for Messenger-Signal");
+    tmp_real_field = MFCalls::alloc_real(fourierLocalSize*2);
+    analysis_plan = MFCalls::plan_dft_r2c_3d(
+                      N0, N1, N2,
+                      tmp_real_field,
+                      tmp_fourier,
+                      comm->comm(),
+                     // FFTW_MPI_TRANSPOSED_OUT|
+                      FFTW_DESTROY_INPUT|FFTW_MEASURE);
+    synthesis_plan = MFCalls::plan_dft_c2r_3d(
+                      N0, N1, N2,
+                      tmp_fourier,
+                      tmp_real_field,
+                      comm->comm(),
+                      //FFTW_MPI_TRANSPOSED_IN|
+                      FFTW_DESTROY_INPUT|FFTW_MEASURE);
+#endif
+    ctx.print(format("allocated tmp_fourier(%p) tmp_fourier_m(%p) and tmp_real_field(%p)") % tmp_fourier % tmp_fourier_m% tmp_real_field);
+    ctx.print("Done creating FFTW plans for Messenger-Signal");
+}
+
+
+MessengerSignalSampler::~MessengerSignalSampler()
+{
+    if (tmp_fourier) {
+        Console::instance().print<LOG_INFO>("Cleaning up Messenger-Signal");
+
+#ifdef ARES_MPI_FFTW
+        delete tmp_m_field;
+#endif
+        if (flat_key)
+            delete flat_key;
+        if (tmp_fourier)
+            MFCalls::free(tmp_fourier);
+        if (tmp_fourier_m)
+            MFCalls::free(tmp_fourier_m);
+        if (tmp_real_field)
+            MFCalls::free(tmp_real_field);
+        if (analysis_plan)
+            MFCalls::destroy_plan(analysis_plan);
+        if (synthesis_plan)
+            MFCalls::destroy_plan(synthesis_plan);
+
+        if (mgr)
+          delete mgr;
+    }
+}
+
+
+void MessengerSignalSampler::sample(MarkovState& state)
+{
+    ConsoleContext<LOG_DEBUG> ctx("MessengerSignalSampler::sample");
+    RandomGen& rng = static_cast<RandomGen&>(state["random_generator"]);
+    ArrayType& m_field = *state.get<ArrayType>("messenger_field");
+    ArrayType1d::ArrayType& P_info = *state.get<ArrayType1d>("powerspectrum")->array;
+    SDouble& tau = static_cast<SDouble&>(state["messenger_tau"]);
+    IArrayType::ArrayType& P_key = *state.get<IArrayType>("k_keys")->array; // Built by powerspec_tools
+
+    ArrayType& x = *x_field;
+    ArrayType& s = *s_field;
+    double alpha = 1/std::sqrt(double(Ntot));
+    Console& cons = Console::instance();
+
+    ctx.print("Sample messenger-signal");
+
+    if (state.get<SBool>("messenger_signal_blocked")->value && constrainedGeneration)
+        return;
+
+    // We have to initialize this lazily. k_keys is created by powerspectrum samplers.
+    if (flat_key == 0) {
+        IArrayType *keys = state.get<IArrayType>("k_keys");
+        flat_key = new FlatIntType( keys->array->data(), boost::extents[keys->array->num_elements()] );
+    }
+
+#pragma omp parallel
+{
+    auto &rng_g = rng.get();
+    const auto &data = x.array->data();
+#pragma omp for schedule(static)
+    for (long i = 0; i < localNtot; i++) {
+        data[i] = rng_g.gaussian()*alpha;
+    }
+}
+    copy_padded_data(*x.array, tmp_real_field, true);
+    // This destroy the x_field. Not a problem. synthesis is regenerating it
+    MFCalls::execute(analysis_plan);
+#ifdef ARES_MPI_FFTW
+    copy_padded_data(*m_field.array, tmp_real_field);
+    MFCalls::execute_r2c(analysis_plan, tmp_real_field, tmp_fourier_m);
+#else
+    // This destroy the m_field. Could be annoying.
+    tmp_m_field->eigen() = m_field.eigen();
+    FCalls::execute_r2c(analysis_plan, m_field.array->data(), tmp_fourier_m);
+#endif
+
+    if (constrainedGeneration) {
+        double scaler = 1/volNorm;
+        double T = tau * volume;
+
+        boost::multi_array<double, 1> sqrtP(boost::extents[N_k]);
+        boost::multi_array<double, 1> A1(boost::extents[N_k]);
+        boost::multi_array<double, 1> A2(boost::extents[N_k]);
+
+        LibLSS::copy_array(sqrtP,
+           b_fused<double>(P_info,
+                  [this,scaler](double x)->double const { return x < 0 ? 0 : std::sqrt(x*volume);}
+           )
+        );
+        LibLSS::copy_array(A1,
+           b_fused<double>(P_info, sqrtP,
+                           [this,scaler,T](double x,double y)->double const { return x < 0 ? 0 :  y/(T+x*volume*scaler); })
+        );
+        LibLSS::copy_array(A2,
+           b_fused<double>(P_info,
+                   [this,scaler,T](double x)->double const { return x < 0 ? 0 : std::sqrt(T/(T+x*volume*scaler)); })
+        );
+
+#pragma omp parallel for schedule(static)
+        for (long i = 0; i < localNtot_k; i++) {
+            long key = (*flat_key)[i];
+            double color_P = sqrtP[key];
+            double aux1 = A1[key];
+            double aux2 = A2[key];
+            MFCalls::complex_type& white_phase = tmp_fourier_m[i];
+            MFCalls::complex_type& random_phase = tmp_fourier[i];
+            MFCalls::complex_type& colored_phase = tmp_fourier_m[i];
+
+            random_phase[0] = aux1 * white_phase[0] + aux2 * random_phase[0];
+            random_phase[1] = aux1 * white_phase[1] + aux2 * random_phase[1];
+
+            colored_phase[0] = color_P * random_phase[0];
+            colored_phase[1] = color_P * random_phase[1];
+        }
+        if (startN0 == 0 && localN0 > 1) {
+          tmp_fourier[0][0] = 0;
+          tmp_fourier[0][1] = 0;
+          tmp_fourier_m[0][0] = 0;
+          tmp_fourier_m[0][1] = 0;
+        }
+    } else {
+#pragma omp parallel for schedule(static)
+        for (long i = 0; i < localNtot_k; i++) {
+            double P = P_info[(*flat_key)[i]] * volume;
+            double color_P = std::sqrt(P);
+            MFCalls::complex_type& white_phase = tmp_fourier_m[i];
+            MFCalls::complex_type& random_phase = tmp_fourier[i];
+            MFCalls::complex_type& colored_phase = tmp_fourier_m[i];
+
+            colored_phase[0] = color_P * random_phase[0];
+            colored_phase[1] = color_P * random_phase[1];
+        }
+    }
+
+    ctx.print("Fourier synthesis of phases");
+    // Regenerate a correct x_field
+    MFCalls::execute(synthesis_plan);
+    copy_unpadded_data(tmp_real_field, *x.array, true);
+
+    ctx.print("Fourier synthesis of signal");
+    // Generate the colored s field
+#ifdef ARES_MPI_FFTW
+    MFCalls::execute_c2r(synthesis_plan, tmp_fourier_m, tmp_real_field);
+    copy_unpadded_data(tmp_real_field, *s.array);
+#else
+    FCalls::execute_c2r(synthesis_plan, tmp_fourier_m, s.array->data());
+    if (constrainedGeneration) {
+        // Restore m_field
+        m_field.eigen() = tmp_m_field->eigen();
+    }
+#endif
+
+    // Just renormalize
+    array::scaleArray3d(*s.array, 1.0/volume);
+    array::scaleArray3d(*x.array, 1.0/volume);
+
+    // Generate m_field in mock mode
+    if (!constrainedGeneration) {
+        double sq_tau = sqrt(tau);
+
+        // Populate m_field
+        for (long i = 0; i < localNtot; i++)
+            m_field.array->data()[i] = s.array->data()[i] + rng.get().gaussian()*sq_tau;
+    }
+
+}
+
+
+/*
+ * (catalog,meta) -> data
+ */
+
+
+void CatalogProjectorSampler::initialize(MarkovState& state)
+{
+    Ncat = static_cast<SLong&>(state["NCAT"]);
+}
+
+void CatalogProjectorSampler::restore(MarkovState& state)
+{
+    Ncat = static_cast<SLong&>(state["NCAT"]);
+}
+
+void CatalogProjectorSampler::sample(MarkovState& state)
+{
+    RandomGen& rng = static_cast<RandomGen&>(state["random_generator"]);
+    ArrayType& W = *state.get<ArrayType>("messenger_mask");
+    SDouble *messenger_tau = state.get<SDouble>("messenger_tau");
+    ArrayType& G = *state.get<ArrayType>("growth_factor");
+    ArrayType& data_field = *state.get<ArrayType>("data_field");
+    // Just do vectorized operation here
+    MappedArray map_W = W.eigen();
+    MappedArray growth = G.eigen();
+    MappedArray map_data = data_field.eigen();
+    ConsoleContext<LOG_DEBUG> ctx("regenerate_W");
+    double heat = state.getScalar<double>("ares_heat");
+
+    ctx.print("Rebuild the projected data and covariance matrix");
+    // Clear up W first
+    map_W.fill(0);
+    if (!mockGeneration)
+        map_data.fill(0);
+    for (int c = 0; c < Ncat; c++) {
+        ctx.print(format("Looking at catalog %d") % c);
+
+        SelArrayType& sel_field = *state.get<SelArrayType>(format("galaxy_synthetic_sel_window_%d") % c);
+        ArrayType& g_field = *state.get<ArrayType>(format("galaxy_data_%d") % c);
+        double& bias = extract_bias(state, c);
+        double nmean = state.get<SDouble>(format("galaxy_nmean_%d") % c)->value;
+        MappedArray g_data = g_field.eigen();
+        MappedArray map_sel = sel_field.eigen();
+
+        if (!mockGeneration)
+            map_data += (g_data - nmean * map_sel) * bias * growth;
+
+        map_W += map_sel * nmean * bias*bias * growth * growth;
+    }
+    map_W /= heat;
+
+    ctx.print("Finish weights");
+
+    // Hmm... I cannot use the vectorized instruction here as it depends on the positivity of map_W[i]. Just do a loop
+    double tau_inverse = 0; // This is the inverse of minimum covariance
+
+#pragma omp parallel for schedule(static)
+    for (long n = 0; n < map_W.size(); n++) {
+        double& val = map_W[n];
+
+        if (val > 0) {
+            if (val > tau_inverse)
+                tau_inverse = val;
+            val = 1/val;
+        } else
+            val = 0;
+    }
+    ctx.print(format("Got partial_tau = %lg") % (1/tau_inverse));
+
+    comm->all_reduce(MPI_IN_PLACE, &tau_inverse, 1, translateMPIType<double>(), MPI_MAX);
+    double tau = 1/tau_inverse;
+
+    messenger_tau->value = tau;
+
+    if (!mockGeneration)
+        map_data *= map_W;
+    else {
+        for (int c = 0; c < Ncat; c++) {
+            SelArrayType& sel_field = *state.get<SelArrayType>(format("galaxy_synthetic_sel_window_%d") % c);
+            double& bias = extract_bias(state, c);
+            double nmean = state.get<SDouble>(format("galaxy_nmean_%d") % c)->value;
+            MappedArray map_sel = sel_field.eigen();
+            ArrayType& s_field = *state.get<ArrayType>("s_field");
+            ArrayType& g_field = *state.get<ArrayType>(format("galaxy_data_%d") % c);
+            MappedArray s_data = s_field.eigen();
+            MappedArray g_data = g_field.eigen();
+            Eigen::ArrayXd err(map_sel.size());
+
+            ctx.print(format("Catalog %d: Generate mock data with nmean = %lg, bias = %lg")  % c % nmean % bias);
+
+            err = map_sel * nmean;
+
+            g_data = err*(1+bias*growth*s_data);
+
+#pragma omp parallel for schedule(static)
+            for (long i = 0; i < err.size(); i++) {
+                double E = err[i];
+                if (E > 0) {
+                    g_data[i] += rng.get().gaussian() * sqrt(E);
+                } else {
+                    g_data[i] = 0;
+                }
+            }
+        }
+    }
+
+#pragma omp parallel for schedule(static)
+    for (long n = 0; n < map_W.size(); n++) {
+        if (map_W[n] > 0)
+            map_W[n] = std::max(double(0), map_W[n] - tau);
+        else
+            map_W[n] = -1;
+    }
+    ctx.print(format("Got tau = %lg") % tau );
+}
--- a/libLSS/samplers/ares/gibbs_messenger.hpp
+++ b/libLSS/samplers/ares/gibbs_messenger.hpp
@ -0,0 +1,102 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/gibbs_messenger.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_GIBBS_MESSENGER_HPP
+#define __LIBLSS_GIBBS_MESSENGER_HPP
+
+#include <CosmoTool/fourier/fft/fftw_calls.hpp>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/mcmc/global_state.hpp"
+#include "libLSS/tools/fftw_allocator.hpp"
+#include "libLSS/samplers/core/markov.hpp"
+#include "libLSS/samplers/core/random_number.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+
+namespace LibLSS {
+
+    namespace GibbsMessenger {
+
+        namespace details {
+
+            typedef FFTW_Manager_3d<double> FFTMgr;
+
+            class MessengerSampler: public MarkovSampler {
+            protected:
+                long N0, N1, N2, Ntot, N_k;
+                long localN0, startN0, localNtot;
+                ArrayType *messenger_mask, *data_field;
+                SDouble *messenger_tau;
+                RandomGen *rng;
+                bool constrainedGeneration;
+                MPI_Communication *comm;
+                FFTMgr *mgr;
+            public:
+                MessengerSampler(MPI_Communication *comm);
+                virtual ~MessengerSampler();
+
+                virtual void restore(MarkovState& state);
+                virtual void initialize(MarkovState& state);
+                virtual void sample(MarkovState& state);
+
+                void setMockGeneration(bool b) { constrainedGeneration = !b; }
+
+            };
+
+            class MessengerSignalSampler: public MarkovSampler {
+            protected:
+                typedef boost::multi_array_ref< IArrayType::ArrayType::element, 1> FlatIntType;
+                long fourierLocalSize;
+                FCalls::plan_type analysis_plan, synthesis_plan;
+                FCalls::complex_type *tmp_fourier, *tmp_fourier_m;
+                FlatIntType *flat_key;
+                double volNorm;
+                long N0, N1, N2, Ntot, Ntot_k, N_k;
+                long startN0, localN0, localNtot, localNtot_k;
+                double L0, L1, L2, volume;
+                ArrayType *tmp_m_field, *x_field, *s_field;
+                bool constrainedGeneration;
+                MPI_Communication *comm;
+                FCalls::real_type *tmp_real_field;
+                FFTMgr *mgr;
+            public:
+                MessengerSignalSampler(MPI_Communication* comm);
+                virtual ~MessengerSignalSampler();
+
+                virtual void restore(MarkovState& state);
+                virtual void initialize(MarkovState& state);
+                virtual void sample(MarkovState& state);
+
+                void setMockGeneration(bool b) { constrainedGeneration = !b; }
+
+            };
+
+            class CatalogProjectorSampler: public MarkovSampler {
+            protected:
+                int Ncat;
+                MPI_Communication *comm;
+                bool mockGeneration;
+            public:
+                CatalogProjectorSampler(MPI_Communication *comm0): comm(comm0), mockGeneration(false) {}
+
+                virtual void restore(MarkovState& state);
+                virtual void initialize(MarkovState& state);
+                virtual void sample(MarkovState& state);
+
+                void setMockGeneration(bool b) { mockGeneration = b; }
+            };
+        }
+    }
+
+    using GibbsMessenger::details::MessengerSampler;
+    using GibbsMessenger::details::MessengerSignalSampler;
+    using GibbsMessenger::details::CatalogProjectorSampler;
+}
+
+#endif
--- a/libLSS/samplers/ares/linbias_sampler.cpp
+++ b/libLSS/samplers/ares/linbias_sampler.cpp
@ -0,0 +1,266 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/linbias_sampler.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <boost/format.hpp>
+#include <functional>
+#include <cmath>
+#include <CosmoTool/algo.hpp>
+#include "libLSS/tools/errors.hpp"
+#include "libLSS/samplers/core/gig_sampler.hpp"
+#include "libLSS/samplers/ares/linbias_sampler.hpp"
+#include "libLSS/samplers/rgen/slice_sweep.hpp"
+#include "libLSS/samplers/ares/ares_bias.hpp"
+#include "libLSS/tools/array_tools.hpp"
+#include "boost/lambda/lambda.hpp"
+
+using namespace LibLSS;
+using boost::format;
+using LibLSS::ARES::extract_bias;
+using LibLSS::ARES::ensure_bias_size;
+namespace ph = std::placeholders;
+
+void LinearBiasSampler::initialize(MarkovState& state)
+{
+    long N0, N1, N2;
+    long localN0, startN0;
+
+    ConsoleContext<LOG_DEBUG> ctx("initialization of LinearBiasSampler");
+    // This sampler depends heavily on the rest of the model.
+    // First grab the number of catalogs available in the markov chain
+
+    Ncat = static_cast<SLong&>(state["NCAT"]);
+
+    N0 = static_cast<SLong&>(state["N0"]);
+    localN0 = static_cast<SLong&>(state["localN0"]);
+    startN0 = static_cast<SLong&>(state["startN0"]);
+    N1 = static_cast<SLong&>(state["N1"]);
+    N2 = static_cast<SLong&>(state["N2"]);
+
+    Ntot = N0*N1*N2;
+    localNtot = localN0*N1*N2;
+    // Ensure that the bias is at least size 1
+    for (unsigned int c = 0; c < Ncat; c++)
+      ensure_bias_size(state, c, boost::array<double,1>({1}));
+}
+
+void LinearBiasSampler::restore(MarkovState& state)
+{
+    ConsoleContext<LOG_DEBUG> ctx("restoration of LinearBiasSampler");
+    initialize(state);
+}
+
+
+static inline double logPosteriorBias(double b, double mean, double dev, double heat)
+{
+  if (b < 0)
+    return -std::numeric_limits<double>::infinity();
+
+  double delta = (b-mean)/dev;
+
+  return -0.5*delta*delta*heat;
+}
+
+void LinearBiasSampler::sample(MarkovState& state)
+{
+    ConsoleContext<LOG_DEBUG>  ctx("sampling of mean and bias");
+    ArrayType& data_field = *state.get<ArrayType>("data_field");
+    ArrayType& W = *state.get<ArrayType>("messenger_mask");
+    double *G = state.get<ArrayType>("growth_factor")->array->data();
+    double *s_field = state.get<ArrayType>("s_field")->array->data();
+    RandomGen *rng = state.get<RandomGen>("random_generator");
+    double heat = state.getScalar<double>("ares_heat");
+    using boost::extents;
+    using CosmoTool::square;
+
+    if (state.get<SBool>("bias_sampler_blocked")->value)
+        return;
+
+    auto ext_Ncat = extents[Ncat];
+    boost::multi_array<double, 1>
+      alphas(ext_Ncat), betas(ext_Ncat),
+      chis(ext_Ncat), psis(ext_Ncat), Npixs(ext_Ncat);
+
+    // ---------------------------------------------------------
+    // Time consuming part, do data reduction per sub-catalog
+    // We are only computing alphas and betas here.
+    for (int c = 0; c < Ncat; c++) {
+        SelArrayType& sel_field = *state.get<SelArrayType>(format("galaxy_synthetic_sel_window_%d") % c);
+        double *g_field = state.get<ArrayType>(format("galaxy_data_%d") % c)->array->data();
+        double& bias = extract_bias(state, c);
+        SDouble *g_nmean = state.get<SDouble>(format("galaxy_nmean_%d") % c);
+        double nmean = g_nmean->value;
+
+        const auto &sel_array = sel_field.array->data();
+        double loc_alpha = 0, loc_beta = 0, loc_psi = 0, loc_chi = 0, loc_Npix = 0, alpha = 0, beta = 0;
+
+#pragma omp parallel for schedule(dynamic, 1024) reduction(+:loc_alpha,loc_beta,loc_chi,loc_psi,loc_Npix)
+        for (long i = 0; i < localNtot; i++) {
+            double selection = sel_array[i];
+            if (selection > 0) {
+                double Nobs = g_field[i];
+                double Dplus = G[i];
+                double density = s_field[i];
+                double aux_gamma = 1 + bias * Dplus * density;
+
+                loc_beta += selection * nmean * Dplus * Dplus * density * density;
+                loc_alpha += (Nobs - selection*nmean) * Dplus * density;
+                loc_chi += Nobs*Nobs/selection;
+                loc_psi += selection * aux_gamma * aux_gamma;
+                loc_Npix++;
+            }
+        }
+
+        // Store the partial result and continue
+        alphas[c] = loc_alpha;
+        betas[c] = loc_beta;
+        chis[c] = loc_chi;
+        psis[c] = loc_psi;
+        Npixs[c] = loc_Npix;
+    }
+
+    // Final reduction
+    ctx.print("Reducing result");
+    comm->all_reduce_t(MPI_IN_PLACE, alphas.data(), Ncat, MPI_SUM);
+    comm->all_reduce_t(MPI_IN_PLACE, betas.data(), Ncat, MPI_SUM);
+    comm->all_reduce_t(MPI_IN_PLACE, chis.data(), Ncat, MPI_SUM);
+    comm->all_reduce_t(MPI_IN_PLACE, psis.data(), Ncat, MPI_SUM);
+    comm->all_reduce_t(MPI_IN_PLACE, Npixs.data(), Ncat, MPI_SUM);
+    ctx.print("Done");
+
+    for (int c = 0; c < Ncat; c++) {
+        double& bias = extract_bias(state, c);
+        double& nmean = state.get<SDouble>(format("galaxy_nmean_%d") % c)->value;
+
+        double alpha = alphas[c], beta = betas[c];
+        bool biasRef = state.get<SBool>(format("galaxy_bias_ref_%d") % c )->value;
+
+        if (comm->rank() == 0 ) {// || comm->size() == 1 ) {    // Use another node */
+          double lambda = 1 - 0.5*Npixs[c];
+
+          nmean = GIG_sampler_3params(heat*psis[c],heat*chis[c],lambda,
+                                               rng->get());
+
+          ctx.print(format("Npix = %d, chi = %lg, psi = %lg") % Npixs[c] % chis[c] % psis[c]);
+          ctx.print(format("Broadcast value -> nmean = %lg") % nmean);
+        }
+
+        if (!biasRef && comm->rank() == 0) {
+            double mean_bias = alpha/beta;
+            double dev_bias = sqrt(1/beta);
+
+            Console::instance().c_assert(!std::isinf(mean_bias) && !std::isnan(mean_bias), "Mean is NaN or infinite");
+            ctx.print(format("bias = %lg, mean_bias = %lg, dev_bias = %lg") % bias % mean_bias % dev_bias);
+            bias = slice_sweep(rng->get(), std::bind(logPosteriorBias, ph::_1, mean_bias, dev_bias, heat), bias, dev_bias);
+
+            Console::instance().c_assert(bias > 0, "Negative bias (0). Ouch!");
+        }
+
+        ctx.print("Sync bias");
+        // Synchronize all nodes with the new bias value
+        comm->broadcast_t(&bias, 1, 0);
+
+        ctx.print("Sync nmean");
+        // Synchronize all nodes with the new mean value
+        comm->broadcast_t(&nmean, 1, 0 );
+
+    }
+
+
+    ///now improve sampling efficiency by performing a joint step in s,P(k) and biases
+    ///NOTE: the following algorithm MUST be executed in sequence
+    ///get RNG
+
+    //only update if power-spectrum is sampled
+    if (state.getScalar<bool>("power_sampler_a_blocked") && 
+        state.getScalar<bool>("power_sampler_b_blocked") && 
+        state.getScalar<bool>("power_sampler_c_blocked"))
+        return;
+
+    RandomGen *rgen = state.get<RandomGen>("random_generator");
+    double factor = 1.;
+
+    if (comm->rank() == 0) {
+        for (int c = 0; c < Ncat; c++) {
+            bool biasRef = state.get<SBool>(format("galaxy_bias_ref_%d") % c )->value;
+
+            //Don't sample the reference bias
+            if (biasRef)
+              continue;
+
+            //1) draw random bias realization (b1) for the first catalog
+            double mean_bias = alphas[c]/betas[c];
+            double dev_bias = sqrt(1./betas[c]);
+            double& b0 = extract_bias(state, c);
+            double b1=b0;
+
+            ctx.print(boost::format("Slice sweeping[%d]: mean_bias = %lg, dev_bias = %lg") % c % mean_bias % dev_bias);
+            b1 = slice_sweep(rng->get(), std::bind(logPosteriorBias, ph::_1, mean_bias, dev_bias, heat), b1, dev_bias);
+
+            double fact_virt = b0/b1;
+
+            //Now calculate hastings value for the all catalogs but the current one (this sum can be done in parallel)
+            double dH=0.;
+            for (int cc = 0; cc < Ncat; cc++) {
+
+                if(c!=cc) {
+                    double bb = extract_bias(state, cc);
+
+                    //Note that we need to operate with the updated density field
+                    //we calculate the metropolis factor of remaining likelihoods with respect to jumps in bias and density field
+                    dH +=       2 * (1-fact_virt) * alphas[cc] * factor * bb -
+                          (1-fact_virt*fact_virt) * betas[cc]*square(factor*bb);
+              }
+            }
+
+            dH *= 0.5*heat;
+
+            //now do Metropolis step
+            double log_u = log(rgen->get().uniform());
+            if (log_u <= -dH) {
+                //update accepted bias
+                b0 = b1;
+                //also update the density factor
+                //this accounts for updating the density and power-spectrum fields deterministically
+                factor *= fact_virt;
+
+    //            ctx.print(format("Sample accepted for catalog nr. %lg! New bias = %lg , New density factor = %lg") %c % b0 % factor);
+            }
+            //if sample is rejected then simply continue
+            comm->broadcast_t(&b0, 1, 0);
+        }
+
+    } else {
+
+      // We are not root, just gather the biases as they are updated
+      for (int c = 0; c < Ncat; c++) {
+        bool biasRef = state.get<SBool>(format("galaxy_bias_ref_%d") % c )->value;
+
+        //Don't sample the reference bias
+        if (!biasRef) {
+          double& b0 = extract_bias(state, c);
+
+          // Update from Root rank the value of bias
+          comm->broadcast_t(&b0, 1, 0);
+        }
+      }
+
+    }
+
+    // Broadcast and gather the scaling factor
+    comm->broadcast_t(&factor, 1, 0);
+
+    //Finally we just need to rescale the density and power-spectrum fields by "factor"
+
+    //1) scale density field in real and Fourier space
+    array::scaleArray3d(*state.get<ArrayType>("s_field")->array, factor);
+
+    //2) scale power-spectrum
+    ArrayType1d::ArrayType& P_info = *state.get<ArrayType1d>("powerspectrum")->array;
+    LibLSS::copy_array(P_info, b_fused<double>(P_info, (factor*factor)*boost::lambda::_1));
+}
--- a/libLSS/samplers/ares/linbias_sampler.hpp
+++ b/libLSS/samplers/ares/linbias_sampler.hpp
@ -0,0 +1,36 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/linbias_sampler.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_LINEAR_BIAS_SAMPLER_HPP
+#define __LIBLSS_LINEAR_BIAS_SAMPLER_HPP
+
+#include <boost/multi_array.hpp>
+#include "libLSS/samplers/core/markov.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+
+namespace LibLSS {
+
+    class LinearBiasSampler: public MarkovSampler {
+    protected:
+        int Ncat;
+        long Ntot, localNtot;
+        boost::multi_array<SDouble *, 1> biases;
+        MPI_Communication *comm;
+    public:
+        LinearBiasSampler(MPI_Communication *comm0) : comm(comm0) {}
+        virtual ~LinearBiasSampler() {}
+        
+        virtual void initialize(MarkovState& state);
+        virtual void restore(MarkovState& state);
+        virtual void sample(MarkovState& state);
+    };
+    
+}
+
+#endif
--- a/libLSS/samplers/ares/powerspectrum_a_sampler.cpp
+++ b/libLSS/samplers/ares/powerspectrum_a_sampler.cpp
@ -0,0 +1,142 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/powerspectrum_a_sampler.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <cmath>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/samplers/ares/powerspectrum_a_sampler.hpp"
+#include "libLSS/mcmc/state_element.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+
+using namespace LibLSS;
+
+void PowerSpectrumSampler_a::base_init()
+{
+    ConsoleContext<LOG_DEBUG> ctx("base_init");
+    
+    ctx.print(boost::format("Allocating Fourier buffer %dx%dx%d") % N0 % N1 % N2_HC);
+    tmp_fourier = MFCalls::alloc_complex(fourierLocalSize);
+    tmp_s = MFCalls::alloc_real(2*fourierLocalSize);
+    assert(tmp_fourier != 0);
+
+    ctx.print(boost::format("Fourier buffer %p") % tmp_fourier);
+    ctx.print(boost::format("Allocating plan %dx%dx%d") % N0 % N1 % N2);
+    analysis_plan = MFCalls::plan_dft_r2c_3d(
+                      N0, N1, N2,
+                      tmp_s,
+                      (FCalls::complex_type *)tmp_fourier,
+#ifdef ARES_MPI_FFTW
+                      comm->comm(),
+#endif
+                      //FFTW_MPI_TRANSPOSED_OUT|
+                      FFTW_DESTROY_INPUT|FFTW_MEASURE);
+
+    flat_keys = new FlatIntType(keys->array->data(), boost::extents[keys->array->num_elements()] );
+}
+
+void PowerSpectrumSampler_a::restore(MarkovState& state)
+{
+    ConsoleContext<LOG_INFO> ctx("restoration of power spectrum sampler (a)");
+    
+    restore_base(state);
+    
+    base_init();
+}
+
+void PowerSpectrumSampler_a::initialize(MarkovState& state)
+{
+    ConsoleContext<LOG_INFO> ctx("initialization of power spectrum sampler (a)");
+     
+    initialize_base(state);
+
+    base_init();
+}
+
+PowerSpectrumSampler_a::PowerSpectrumSampler_a(MPI_Communication *comm0)
+    : PowerSpectrumSampler_Base(comm0), tmp_fourier(0), flat_keys(0), tmp_s(0)
+{
+}
+
+
+PowerSpectrumSampler_a::~PowerSpectrumSampler_a()
+{
+    if (tmp_fourier) {
+        Console::instance().print<LOG_INFO>("Cleaning up Powerspectrum sampler (a)");
+
+        MFCalls::free(tmp_fourier);   
+        MFCalls::destroy_plan(analysis_plan);
+        delete flat_keys;
+    }
+    if (tmp_s)
+        MFCalls::free(tmp_s);
+}
+
+void PowerSpectrumSampler_a::sample(MarkovState& state)
+{
+    // Grab the messenger field
+    ConsoleContext<LOG_DEBUG> ctx("PowerSpectrumSampler_a::sample");
+    Console& cons = Console::instance();
+    ArrayType& s_field = static_cast<ArrayType&>(state["s_field"]);
+
+    //return;
+    IArrayType1d::ArrayType& nmode_array = *nmode->array;
+    ArrayType1d::ArrayType& P_array = *P->array;
+
+    
+    if (state.get<SBool>("power_sampler_a_blocked")->value)
+        return;
+    
+    copy_padded_data(*s_field.array, tmp_s);
+    MFCalls::execute(analysis_plan);
+
+    ctx.print("Compute inverse-gamma parameter");
+
+    std::fill(P_array.begin(), P_array.end(), 0);
+    
+    ctx.print(boost::format("N_fourier_elements = %d") % N_fourier_elements);
+    int *adjust = adjustMul->array->data();
+//#pragma omp parallel for schedule(static)
+    for (long i = 0; i < local_fourier_elements; i++) {    
+        FCalls::complex_type& m_hat = tmp_fourier[i];
+        double Pelt = m_hat[0]*m_hat[0] + m_hat[1]*m_hat[1];
+        
+        // adjust increase memory bandwidth consumption. Not great...
+        // OTOH it is very convenient and this loop is not the most time consuming aspect
+        P_array[ (*flat_keys)[i] ] += adjust[i] * Pelt;
+    }
+    P_sync.mpiAllSum(*comm);
+
+    ctx.print("Sample new power spectrum");
+    
+    const int alpha=1; ///Jeffreys prior
+
+    // Only compute random numbers on rank==0, broadcast after
+    if (comm->rank() == 0) {
+#pragma omp parallel for schedule(static)
+        for(long l = 0; l < N_k; l++) {
+            if(nmode_array[l] > 0) {
+                int beta = (2*alpha-2) + nmode_array[l];
+
+                ///generate CHi-SQUARE sample
+                double z2 = 0.;
+                for(int j = 0; j < beta; j++) {
+                    double aux=rgen->get().gaussian(); 
+
+                    z2 += aux*aux;
+                }
+                ///calculate power-spectrum sample
+                P_array[l] = (P_array[l]/z2) * volNorm / Ntot;
+                
+            }
+        }
+    }
+    
+    P_sync.mpiBroadcast(*comm);
+}
+
--- a/libLSS/samplers/ares/powerspectrum_a_sampler.hpp
+++ b/libLSS/samplers/ares/powerspectrum_a_sampler.hpp
@ -0,0 +1,41 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/powerspectrum_a_sampler.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_POWERSPECTRUM_A_SAMPLER_HPP
+#define __LIBLSS_POWERSPECTRUM_A_SAMPLER_HPP
+
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/samplers/core/markov.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+
+namespace LibLSS {
+
+    class PowerSpectrumSampler_a: public PowerSpectrumSampler_Base {
+    protected:
+        typedef boost::multi_array_ref< IArrayType::ArrayType::element, 1> FlatIntType;
+
+        FCalls::complex_type *tmp_fourier;
+        FCalls::plan_type analysis_plan;
+        FlatIntType *flat_keys;
+        MFCalls::real_type *tmp_s;
+        
+        void base_init();
+    public:
+        PowerSpectrumSampler_a(MPI_Communication *comm);
+        virtual ~PowerSpectrumSampler_a();
+
+        virtual void restore(MarkovState& state);
+        virtual void initialize(MarkovState& state);
+        virtual void sample(MarkovState& state);    
+    };
+
+}
+
+#endif
--- a/libLSS/samplers/ares/powerspectrum_b_sampler.cpp
+++ b/libLSS/samplers/ares/powerspectrum_b_sampler.cpp
@ -0,0 +1,217 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/powerspectrum_b_sampler.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <CosmoTool/algo.hpp>
+#include <cmath>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/mcmc/state_element.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+#include "libLSS/samplers/ares/powerspectrum_b_sampler.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+
+using boost::format;
+using namespace LibLSS;
+
+PowerSpectrumSampler_b::PowerSpectrumSampler_b(MPI_Communication *comm0)
+    : PowerSpectrumSampler_Coloring(comm0),
+      tmp_fourier(0), P0_array(boost::extents[0]), P1_array(boost::extents[0]), 
+      tmp_x(0), tmp_t(0), total_accepted(0), total_tried(0), flat_keys(0)
+{
+}
+
+PowerSpectrumSampler_b::~PowerSpectrumSampler_b()
+{
+    if (tmp_fourier) {
+        Console::instance().print<LOG_INFO>("Cleaning up Powerspectrum sampler (b)");
+        Console::instance().print<LOG_DEBUG>(format("tmp_fourier=%p tmp_fourier=%p") % tmp_fourier % tmp_fourier_t);
+
+        FCalls::free(tmp_fourier);   
+        FCalls::free(tmp_fourier_t);   
+        FCalls::destroy_plan(analysis_plan);
+    }
+    if (tmp_x)
+        MFCalls::free(tmp_x);
+    if (tmp_t)
+        MFCalls::free(tmp_t);
+    if (flat_keys)
+        delete flat_keys;
+}
+
+void PowerSpectrumSampler_b::base_init(MarkovState& state)
+{
+    ConsoleContext<LOG_DEBUG> ctx("base init");
+    
+    ctx.print(boost::format("Allocating Fourier buffer %dx%dx%d (sz=%d)") % localN0 % N1 % N2_HC % fourierLocalSize);
+    tmp_fourier = MFCalls::alloc_complex(fourierLocalSize);
+    tmp_fourier_t = MFCalls::alloc_complex(fourierLocalSize);
+    tmp_x = MFCalls::alloc_real(2*fourierLocalSize);
+    tmp_t = MFCalls::alloc_real(2*fourierLocalSize);
+    P0_array.resize(boost::extents[N_k]);
+    P1_array.resize(boost::extents[N_k]);
+    
+    ctx.print(boost::format("Fourier buffer %p") % tmp_fourier);
+    ctx.print(boost::format("Allocating plan %dx%dx%d") % N0 % N1 % N2);
+    analysis_plan = MFCalls::plan_dft_r2c_3d(
+                      N0, N1, N2,
+                      tmp_x, 
+                      tmp_fourier,
+#ifdef ARES_MPI_FFTW
+                      comm->comm(),
+#endif
+                      //FFTW_MPI_TRANSPOSED_OUT|
+                      FFTW_DESTROY_INPUT|FFTW_MEASURE);
+
+    flat_keys = new FlatIntType(keys->array->data(), boost::extents[keys->array->num_elements()] );
+
+    state.newElement("sampler_b_accepted", new SLong());
+    state.newElement("sampler_b_tried", new SLong());
+}
+
+void PowerSpectrumSampler_b::restore(MarkovState& state)
+{
+    ConsoleContext<LOG_INFO> ctx("restoration of power spectrum sampler (b)");
+    
+    ctx.print("Restoring power spectrum sampler (b)");
+    
+    restore_base(state);
+    restore_coloring(state);
+
+    base_init(state);
+    
+}
+
+void PowerSpectrumSampler_b::initialize(MarkovState& state)
+{
+    ConsoleContext<LOG_INFO> ctx("initialization of power spectrum sampler (b)");
+    Console& cons  = Console::instance();
+
+    initialize_base(state);
+    initialize_coloring(state);
+    base_init(state);
+
+    state.get<SLong>("sampler_b_accepted")->value = 0;
+    state.get<SLong>("sampler_b_tried")->value = 0;    
+    
+}
+
+
+void PowerSpectrumSampler_b::sample(MarkovState& state)
+{
+    // Grab the messenger field
+    ConsoleContext<LOG_DEBUG> ctx("sampling of power spectrum (b)");
+    Console& cons = Console::instance();
+    ArrayType& x_field = static_cast<ArrayType&>(state["x_field"]);
+    ArrayType& t_field = static_cast<ArrayType&>(state["messenger_field"]);
+    RandomGen *rng = state.get<RandomGen>("random_generator");
+    IArrayType1d::ArrayType& nmode_array = *nmode->array;
+    ArrayType1d::ArrayType& P_array = *P->array;
+    SDouble *messenger_tau = state.get<SDouble>("messenger_tau");
+    double tau = messenger_tau->value;
+    long localNtot = localN0*N1*N2;
+
+    if (state.get<SBool>("power_sampler_b_blocked")->value)
+        return;
+
+#ifdef ARES_MPI_FFTW
+    copy_padded_data(*x_field.array, tmp_x);
+    copy_padded_data(*t_field.array, tmp_t);
+#else
+    ::memcpy(tmp_x, x_field.array->data(), Ntot * sizeof(MFCalls::real_type));
+    ::memcpy(tmp_t, t_field.array->data(), Ntot * sizeof(MFCalls::real_type));
+#endif
+
+    ctx.print("Fourier analysis (1)");
+    MFCalls::execute(analysis_plan);
+    ctx.print("Fourier analysis (2)");
+    MFCalls::execute_r2c(analysis_plan, tmp_t, tmp_fourier_t);
+    
+    ctx.print("Compute inverse-gamma parameter");
+
+    ctx.print(boost::format("local_fourier_elements = %d") % local_fourier_elements);
+    int *adjust = adjustMul->array->data();
+    
+    std::fill(P0_array.begin(), P0_array.end(), 0);
+    std::fill(P1_array.begin(), P1_array.end(), 0);
+    
+//#pragma omp parallel for schedule(static)
+    for (long i = 0; i < local_fourier_elements; i++) {    
+        FCalls::complex_type& x_hat = tmp_fourier[i];
+        FCalls::complex_type& t_hat = tmp_fourier_t[i];
+        double Pelt_cross = x_hat[0]*t_hat[0] + x_hat[1]*t_hat[1];
+        double Pelt_auto  = x_hat[0]*x_hat[0] + x_hat[1]*x_hat[1];
+        
+        // adjust increase memory bandwidth consumption. Not great...
+        // OTOH it is very convenient and this loop is not the most time consuming aspect
+        P0_array[ (*flat_keys)[i] ] += adjust[i] * Pelt_cross;
+        P1_array[ (*flat_keys)[i] ] += adjust[i] * Pelt_auto;
+    }
+
+    // No helper function written here. Ask MPI to reduce the arrays in-place.
+    comm->all_reduce_t(MPI_IN_PLACE, P0_array.data(), P0_array.num_elements(), 
+                       MPI_SUM);
+    comm->all_reduce_t(MPI_IN_PLACE, P1_array.data(), P1_array.num_elements(), 
+                       MPI_SUM);
+
+    int accepted = 0, tried = 0;
+    double normalization = tau * Ntot;
+    
+    if (comm->rank() == 0) {
+        ctx.print("Accumulated, now create plausible sample");
+#pragma omp parallel for schedule(static) reduction(+:accepted,tried)
+        for (int i = 0; i < N_k; i++) {
+            if (P1_array[i] > 0) {
+                double s = 1/P1_array[i];
+                P0_array[i] *= s;
+                P1_array[i] = sqrt(s * normalization);
+            } else {
+                continue;
+            }
+            
+            double u0 = sqrt(P_array[i] * volume);
+            double u1 = -1;
+            double mean = P0_array[i];
+            double sigma = P1_array[i];
+            assert(!std::isnan(u0));
+            assert(!std::isnan(mean));
+            assert(!std::isnan(sigma));
+            ctx.print(format(" k = %lg, mean = %lg, sigma = %lg")  % (*k->array)[i]% mean % sigma);
+            if (mean < 0) mean = 0;
+            while(u1 < 0) 
+                u1 = mean + sigma*rng->get().gaussian(); ///NOTE: sample from truncated Gaussian
+        
+            double PA = u1/u0;
+            if(PA>1.) 
+                PA=1.;
+            
+            double u = rng->get().uniform();
+            if (u < PA) {        
+                P_array[i] = u1*u1 / volume;
+                accepted++;
+            }
+            tried++;
+        }
+    }
+    
+    ctx.print("Broadcast data");
+    P_sync.mpiBroadcast(*comm);
+        
+    total_accepted += accepted;
+    total_tried += tried;
+    
+    // Force update s_field with the new P
+    update_s_field_from_x(state);
+    
+    state.get<SLong>("sampler_b_accepted")->value = total_accepted;
+    state.get<SLong>("sampler_b_tried")->value = total_tried;
+
+    if (comm->rank() == 0)
+        Console::instance().print<LOG_VERBOSE>(format("PSpec sampler (b) total acceptance ratio: %2.0f %%") % (double(total_accepted)*100/total_tried));
+}
+
+
--- a/libLSS/samplers/ares/powerspectrum_b_sampler.hpp
+++ b/libLSS/samplers/ares/powerspectrum_b_sampler.hpp
@ -0,0 +1,44 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/powerspectrum_b_sampler.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_POWERSPECTRUM_B_SAMPLER_HPP
+#define __LIBLSS_POWERSPECTRUM_B_SAMPLER_HPP
+
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/samplers/core/markov.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+
+namespace LibLSS {
+
+    class PowerSpectrumSampler_b: public PowerSpectrumSampler_Coloring {
+    protected:
+        typedef boost::multi_array_ref< IArrayType::ArrayType::element, 1> FlatIntType;
+
+        MFCalls::complex_type *tmp_fourier, *tmp_fourier_t;
+        MFCalls::real_type *tmp_x, *tmp_t;
+        MFCalls::plan_type analysis_plan;
+        FlatIntType *flat_keys;
+        int total_accepted, total_tried;
+
+        ArrayType1d::ArrayType P0_array, P1_array;
+        
+        void base_init(MarkovState& state);
+    public:
+        PowerSpectrumSampler_b(MPI_Communication *comm);
+        virtual ~PowerSpectrumSampler_b();
+
+        virtual void restore(MarkovState& state);
+        virtual void initialize(MarkovState& state);
+        virtual void sample(MarkovState& state);    
+    };
+
+}
+
+#endif
--- a/libLSS/samplers/ares/powerspectrum_c_sampler.cpp
+++ b/libLSS/samplers/ares/powerspectrum_c_sampler.cpp
@ -0,0 +1,196 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/powerspectrum_c_sampler.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <CosmoTool/algo.hpp>
+#include <functional>
+#include <cmath>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/mcmc/state_element.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+#include "libLSS/samplers/ares/powerspectrum_c_sampler.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include "libLSS/samplers/rgen/slice_sweep.hpp"
+#include "libLSS/samplers/ares/ares_bias.hpp"
+
+static const int ROOT = 0;
+static const size_t LARGE_SCALE_MODE_COUNT=14;
+
+using boost::format;
+using namespace LibLSS;
+using LibLSS::ARES::extract_bias;
+namespace ph = std::placeholders;
+
+PowerSpectrumSampler_c::PowerSpectrumSampler_c(MPI_Communication *comm0)
+    : PowerSpectrumSampler_Coloring(comm0), counter_evaluations(0)
+{
+}
+
+PowerSpectrumSampler_c::~PowerSpectrumSampler_c()
+{
+}
+
+void PowerSpectrumSampler_c::base_init(MarkovState& state)
+{
+    ConsoleContext<LOG_DEBUG> ctx("base init");
+
+    Ncatalog = state.get<SLong>("NCAT")->value;
+    localNtot = localN0 * N1 * N2;
+
+    // Create a counter reinitialized at each save that look at the number of posterior evaluation
+    // required for each mode
+    counter_evaluations = new IArrayType1d(boost::extents[P->array->num_elements()]);
+    state.newElement("spectrum_c_eval_counter", counter_evaluations, true);
+    counter_evaluations->setResetOnSave(0);
+    counter_evaluations->fill(0);
+
+    sigma_init = new ArrayType1d(boost::extents[P->array->num_elements()]);
+    state.newElement("spectrum_c_init_sigma", sigma_init);
+    sigma_init->fill(0);
+}
+
+void PowerSpectrumSampler_c::restore(MarkovState& state)
+{
+    ConsoleContext<LOG_INFO> ctx("restoration of power spectrum sampler (b)");
+
+    ctx.print("Restoring power spectrum sampler (b)");
+
+    restore_base(state);
+    restore_coloring(state);
+
+    base_init(state);
+
+    init_sampler = false;
+}
+
+void PowerSpectrumSampler_c::initialize(MarkovState& state)
+{
+    ConsoleContext<LOG_INFO> ctx("initialization of power spectrum sampler (c)");
+    Console& cons  = Console::instance();
+
+    initialize_base(state);
+    initialize_coloring(state);
+    base_init(state);
+
+    init_sampler = true;
+}
+
+
+double PowerSpectrumSampler_c::log_likelihood(MarkovState& state, int k, double P_trial)
+{
+    // Reuse system power spectrum
+    //
+    if (P_trial < 0)
+      return -std::numeric_limits<double>::infinity();
+
+    (*P->array)[k] = P_trial;
+    update_s_field_from_x(state, (*P));
+
+    // Now compute full likelihood
+    double *s = state.get<ArrayType>("s_field")->array->data();
+    double heat = state.getScalar<double>("ares_heat");
+
+    double L = 0, loc_L = 0;
+    for (int c = 0; c < Ncatalog; c++) {
+      double Lc = 0;
+      SelArrayType& sel_field = *state.get<SelArrayType>(format("galaxy_synthetic_sel_window_%d") % c);
+      ArrayType& g_field = *state.get<ArrayType>(format("galaxy_data_%d") % c);
+      double bias = extract_bias(state, c);
+      double nmean = state.get<SDouble>(format("galaxy_nmean_%d") % c)->value;
+      double *R = sel_field.array->data();
+      double *gdata = g_field.array->data();
+
+//#pragma omp simd aligned(s,R,gdata)
+#pragma omp parallel for schedule(static) reduction(+:Lc)
+      for (long i = 0; i < localNtot; i++) {
+        if (R[i] <= 0)
+          continue;
+        Lc += CosmoTool::square(gdata[i] - nmean * R[i] * (1 + bias * s[i])) / (R[i]*nmean);
+      }
+
+      loc_L += Lc;
+    }
+
+    comm->reduce_t(&loc_L, &L, 1, MPI_SUM, ROOT);
+//    if (comm->rank() == 0)
+//      Console::instance().print<LOG_INFO>(format("Likelihood(P=%lg) = %lg") % P_trial % L);
+
+//    o << format("%15.15lg %15.15lg")%P_trial %L<< std::endl;
+
+    (*counter_evaluations->array)[k]++;
+    return -0.5*heat*L - std::log(P_trial);
+}
+
+void PowerSpectrumSampler_c::sample(MarkovState& state)
+{
+    // Grab the messenger field
+    ConsoleContext<LOG_INFO_SINGLE> ctx("sampling of power spectrum (c)");
+    Console& cons = Console::instance();
+    ArrayType& x_field = static_cast<ArrayType&>(state["x_field"]);
+    RandomGen *rng = state.get<RandomGen>("random_generator");
+    IArrayType1d::ArrayType& nmode_array = *nmode->array;
+    ArrayType1d::ArrayType& P_array = *P->array;
+    long localNtot = localN0*N1*N2;
+    long step = state.get<SLong>("MCMC_STEP")->value;
+
+    if (state.get<SBool>("power_sampler_c_blocked")->value)
+        return;
+    if ((step % 10) != 0) {
+        return;
+    }
+
+    ctx.print("Fourier analysis (1)");
+    copy_padded_data(*x_field.array, tmp_real);
+
+    MFCalls::execute_r2c(analysis_plan, tmp_real, tmp_fourier);
+
+    int *counts = key_counts->array->data();
+
+    ArrayType1d::ArrayType& sigma_init_array = *sigma_init->array;
+    if (init_sampler) {
+      ctx.print("initial guess for the step for slice sampler...");
+       for (long i = 0 ; i < P_array.size() ; i++) {
+         if (counts[i] == 0)
+           sigma_init_array[i] = 0;
+         else
+           sigma_init_array[i] = (P_array[i]) / std::sqrt(double(counts[i]));
+       }
+       init_sampler = false;
+    }
+
+    for (int i = 0; i < std::min(LARGE_SCALE_MODE_COUNT, P_array.size()); i++) {
+   //   std::string fname = str(format("P_k_%d.txt") % i);
+   //   std::ofstream f(fname.c_str());
+      // Skip zero mode
+      if (counts[i] == 0)
+        continue;
+
+      double cosmic_var = sigma_init_array[i];
+      ctx.print(format("Finding P_array(k=%d / %d) cvar=%g") % i % P_array.size() % cosmic_var);
+
+      auto posterior_fun =
+             std::bind(&PowerSpectrumSampler_c::log_likelihood,
+                         this, boost::ref(state), i, ph::_1);
+
+      // We need the slice_sweep_double algo here. Cosmic var tends to quite underestimate
+      // the width of the posterior
+      if (cosmic_var >0)
+        P_array[i] =
+          slice_sweep_double(comm, rng->get(),
+              posterior_fun,
+              P_array[i], cosmic_var);
+
+      comm->broadcast_t(&P_array[i], 1, ROOT);
+    }
+
+    update_s_field_from_x(state);
+
+}
--- a/libLSS/samplers/ares/powerspectrum_c_sampler.hpp
+++ b/libLSS/samplers/ares/powerspectrum_c_sampler.hpp
@ -0,0 +1,47 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/powerspectrum_c_sampler.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_POWERSPECTRUM_C_SAMPLER_HPP
+#define __LIBLSS_POWERSPECTRUM_C_SAMPLER_HPP
+
+#include <iostream>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/samplers/core/markov.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+
+namespace LibLSS {
+
+    class PowerSpectrumSampler_c: public PowerSpectrumSampler_Coloring {
+    protected:
+        typedef boost::multi_array_ref< IArrayType::ArrayType::element, 1> FlatIntType;
+
+        long localNtot;
+        int total_accepted, total_tried;
+
+        bool init_sampler;
+        IArrayType1d *counter_evaluations;
+        ArrayType1d *sigma_init;
+
+        void base_init(MarkovState& state);
+        
+        double log_likelihood(MarkovState& state, int k, double P_trial);
+        
+    public:
+        PowerSpectrumSampler_c(MPI_Communication *comm);
+        virtual ~PowerSpectrumSampler_c();
+
+        virtual void restore(MarkovState& state);
+        virtual void initialize(MarkovState& state);
+        virtual void sample(MarkovState& state);    
+    };
+
+}
+
+#endif
--- a/libLSS/samplers/ares/synthetic_selection.cpp
+++ b/libLSS/samplers/ares/synthetic_selection.cpp
@ -0,0 +1,102 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/synthetic_selection.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <boost/format.hpp>
+#include "libLSS/tools/errors.hpp"
+#include "libLSS/samplers/core/gig_sampler.hpp"
+#include "libLSS/samplers/ares/synthetic_selection.hpp"
+#include "libLSS/tools/fused_array.hpp"
+#include "libLSS/tools/fused_assign.hpp"
+
+using namespace LibLSS;
+using boost::format;
+
+using boost::extents;
+
+typedef boost::multi_array_types::extent_range range;
+
+
+void SyntheticSelectionUpdater::initialize(MarkovState& state)
+{
+    long N0, N1, N2;
+    long localN0, startN0;
+    long localNdata[6], Ndata[3];
+    
+    ConsoleContext<LOG_DEBUG> ctx("initialization of Selection updater");
+    
+    Ncat = static_cast<SLong&>(state["NCAT"]);
+    
+    N0 = static_cast<SLong&>(state["N0"]);
+    localN0 = static_cast<SLong&>(state["localN0"]);
+    startN0 = static_cast<SLong&>(state["startN0"]);
+    N1 = static_cast<SLong&>(state["N1"]);
+    N2 = static_cast<SLong&>(state["N2"]);
+    state.getScalarArray<long,3>("Ndata", Ndata);
+    state.getScalarArray<long,6>("localNdata", localNdata);
+
+    Ntot = N0*N1*N2;
+    localNtot = localN0*N1*N2;
+
+    for (int c = 0; c < Ncat; c++) {
+        SelArrayType *sel_window;
+        state.newElement(format("galaxy_synthetic_sel_window_%d") % c, 
+            sel_window = new SelArrayType(extents[range(localNdata[0],localNdata[1])][range(localNdata[2],localNdata[3])][range(localNdata[4],localNdata[5])]));
+
+        sel_window->setRealDims(ArrayDimension(Ndata[0], Ndata[1], Ndata[2]));
+    }
+}
+
+void SyntheticSelectionUpdater::restore(MarkovState& state)
+{
+    initialize(state);
+}
+
+void SyntheticSelectionUpdater::sample(MarkovState& state)
+{
+    ConsoleContext<LOG_VERBOSE> ctx("processing of 3d selection (including foregrounds)");
+    
+    for (int c = 0; c < Ncat; c++) {
+        SelArrayType *original_selection_grid = state.get<SelArrayType>(format("galaxy_sel_window_%d") % c);
+        SelArrayType *sel_grid = state.get<SelArrayType>(format("galaxy_synthetic_sel_window_%d") % c);
+        IArrayType1d *fgmap = state.get<IArrayType1d>(format("catalog_foreground_maps_%d") % c);
+        ArrayType1d *fgvals = state.get<ArrayType1d>(format("catalog_foreground_coefficient_%d") % c);
+        int NcatForegrounds = fgmap->array->num_elements();
+        
+        ctx.format("Copy initial selection for catalog %d", c);
+        sel_grid->eigen() = original_selection_grid->eigen();
+        
+        for (int f = 0; f < NcatForegrounds; f++) {
+            int c = (*fgmap->array)[f];
+            double val = (*fgvals->array)[f];
+            
+            ctx.print(format("Applying foreground %d (value %lg) to selection of catalog %d") % f % val % c);
+            
+            ArrayType *fgField = state.get<ArrayType>(format("foreground_3d_%d") % (c)); 
+
+            auto mergingFunction = [val](double s,double f) { return s*(1 - f * val); };
+
+            // copy_array is parallelized, hopefully later vectorized  
+            if (f == 0) {
+                LibLSS::copy_array(*sel_grid->array, 
+                  b_fused<double>(*original_selection_grid->array,
+                                  *fgField->array, 
+                                  mergingFunction
+                                 )
+                );
+            } else {
+                LibLSS::copy_array(*sel_grid->array, 
+                  b_fused<double>(*sel_grid->array, 
+                                  *fgField->array, 
+                                  mergingFunction
+                                 )
+                );
+            }
+        }
+    }
+}
--- a/libLSS/samplers/ares/synthetic_selection.hpp
+++ b/libLSS/samplers/ares/synthetic_selection.hpp
@ -0,0 +1,33 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/ares/synthetic_selection.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_SYNTHETIC_SELECTION_UPDATER_HPP
+#define __LIBLSS_SYNTHETIC_SELECTION_UPDATER_HPP
+
+#include <boost/multi_array.hpp>
+#include "libLSS/samplers/core/markov.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+
+namespace LibLSS {
+
+    class SyntheticSelectionUpdater: public MarkovSampler {
+    protected:
+        int Ncat;
+        long Ntot, localNtot;
+    public:
+
+        virtual void initialize(MarkovState& state);
+        virtual void restore(MarkovState& state);
+        virtual void sample(MarkovState& state);
+
+    };
+    
+};
+
+#endif
--- a/libLSS/samplers/core/gaussian_ratio.tcc
+++ b/libLSS/samplers/core/gaussian_ratio.tcc
@ -0,0 +1,25 @@
+inline double RandomNumber::gaussian_ratio()
+{ 
+    double u, v, x, y, Q;
+    const double s = 0.449871;    /* Constants from Leva */
+    const double t = -0.386595;
+    const double a = 0.19600;
+    const double b = 0.25472;
+    const double r1 = 0.27597;
+    const double r2 = 0.27846;
+
+    do {    
+        u = 1 - uniform();
+        v = uniform() - 0.5;
+        
+        v *= 1.7156;
+        
+        x = u - s;
+        y = std::abs(v) - t;
+        Q = x * x + y * (a * y - b * x);
+    }
+    while (Q >= r1 && (Q > r2 || (v*v) > (-4*u*u*log(u)) ) );
+    
+    return v/u;   
+}
+
--- a/libLSS/samplers/core/gig_sampler.cc
+++ b/libLSS/samplers/core/gig_sampler.cc
@ -0,0 +1,105 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <iomanip>
+#include <math.h>
+#include <cassert>
+#include <iostream>
+#include <fstream>
+#include <cfloat>
+#include <string>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include "gig_sampler.hpp"
+
+using namespace std;
+
+using namespace LibLSS;
+
+static double psi(double x, double alpha, double lambd)
+{
+ return (-alpha*(cosh(x) -1.)-lambd*(exp(x)-x-1.)); 
+}
+
+static double psi_prime(double x, double alpha,double lambd)
+{
+  return (-alpha*sinh(x)-lambd*(exp(x)-1.));
+}
+
+static double GIG_sampler_Devroy(double lambd, double omega,  RandomNumber& rng)
+{
+  double alpha=sqrt(omega*omega+lambd*lambd)-lambd;
+
+  double psi0=psi(1.,alpha,lambd);
+  double psi1=psi(-1.,alpha,lambd);
+
+  double rho=4.;
+  double t=sqrt(2.*rho/(alpha+lambd));
+  rho=psi(t,alpha,lambd);
+
+  double taux=t;
+  taux=(-taux+(rho-psi(-taux,alpha,lambd))/psi_prime(-taux,alpha,lambd))*(-1.);
+  taux=(-taux+(rho-psi(-taux,alpha,lambd))/psi_prime(-taux,alpha,lambd))*(-1.);
+  taux=(-taux+(rho-psi(-taux,alpha,lambd))/psi_prime(-taux,alpha,lambd))*(-1.);
+  taux=(-taux+(rho-psi(-taux,alpha,lambd))/psi_prime(-taux,alpha,lambd))*(-1.);
+  double s=(-taux+(rho-psi(-taux,alpha,lambd))/psi_prime(-taux,alpha,lambd))*(-1.);
+
+  double eta     = -psi(t,alpha,lambd);
+  double theta   = -psi_prime(t,alpha,lambd);
+  double phi     = -psi(-s,alpha,lambd);
+  double xi      =  psi_prime(-s,alpha,lambd); 
+
+  double p       =  1./xi;
+  double r       =  1./theta;
+
+  double t_prime =  t-r*eta;
+  double s_prime =  s-p*phi;
+  double q       =  t_prime+s_prime;
+
+  double X=0.;
+  double chi=0.;
+  
+  while(true)
+    {
+        double U=rng.uniform();
+        double V=rng.uniform();
+        double W=rng.uniform();
+        
+        if(U<q/(p+q+r))
+          {
+            X=-s_prime+q*V;
+            chi=0.;
+          }
+        else if (U<(q+r)/(p+q+r))
+          {
+            X=t_prime+r*log(1./V);
+            chi=(-eta-theta*(X-t));
+          }
+        else
+          {
+            X=-s_prime-p*log(1./V);
+            chi=(-phi+xi*(X+s));
+          }
+        if (log(W)+chi <= (psi(X,alpha,lambd))) break;
+    }
+  return ((lambd/omega+sqrt(1.+lambd*lambd/omega/omega))*exp(X));
+}
+
+double LibLSS::GIG_sampler_3params(double a,double b,double p, RandomNumber& rng)
+{
+	///this routine provides samples of the three parameter Generalized Inverse Gaussian (GIG) distribution
+	/// log(P)=-1./2.*(x*a+b*power(x,-1.)) + (p-1.)*log(x)
+  
+    double lambd=p;
+    double omega=sqrt(b*a);
+
+    //one only needs to draw for lambda>0 see Devroy 2014
+    double X=0.;
+
+    if(lambd>0.) 
+      X=GIG_sampler_Devroy(lambd,omega,rng);
+    else
+      X=1./GIG_sampler_Devroy(-lambd,omega,rng);
+
+    return sqrt(b/a)*X;
+}
--- a/libLSS/samplers/core/gig_sampler.hpp
+++ b/libLSS/samplers/core/gig_sampler.hpp
@ -0,0 +1,19 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/gig_sampler.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LSS_GIG_SAMPLER_HPP
+#define __LSS_GIG_SAMPLER_HPP
+
+#include "random_number.hpp"
+
+namespace LibLSS {
+  double GIG_sampler_3params(double a,double b,double p, RandomNumber& rng);
+}
+
+#endif
--- a/libLSS/samplers/core/main_loop.cpp
+++ b/libLSS/samplers/core/main_loop.cpp
@ -0,0 +1,122 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/main_loop.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include "libLSS/tools/console.hpp"
+#include "libLSS/samplers/core/main_loop.hpp"
+#include "libLSS/tools/timing_db.hpp"
+
+using namespace LibLSS;
+using std::string;
+
+MainLoop::MainLoop() {
+  show_splash();
+  mcmc_id = 0;
+}
+
+MainLoop::~MainLoop() {}
+
+void MainLoop::show_splash() {}
+
+void MainLoop::initialize() {
+  Console &cons = Console::instance();
+
+  cons.print<LOG_STD>("Initializing samplers");
+  cons.indent();
+
+  for (MCList::iterator i = mclist.begin(); i != mclist.end(); ++i) {
+    i->first->init_markov(state);
+  }
+  cons.unindent();
+  cons.print<LOG_STD>("Done");
+}
+
+void MainLoop::snap() {
+  using boost::format;
+  using boost::str;
+  MPI_Communication *comm = MPI_Communication::instance();
+  std::shared_ptr<H5::H5File> f;
+
+  if (comm->rank() == 0) {
+    f = std::make_shared<H5::H5File>(
+        str(format("mcmc_%d.h5") % mcmc_id), H5F_ACC_TRUNC);
+  }
+
+  state.mpiSaveState(f, comm, false, true);
+  mcmc_id++;
+}
+
+void MainLoop::save() {
+  using boost::format;
+  using boost::str;
+  MPI_Communication *comm = MPI_Communication::instance();
+  string fname_final = str(format("restart.h5_%d") % comm->rank());
+  string fname_build = fname_final + "_build";
+
+  {
+    H5::H5File f(fname_build, H5F_ACC_TRUNC);
+    state.saveState(f);
+    timings::save(f);
+  }
+  comm->barrier();
+
+  rename(fname_build.c_str(), fname_final.c_str());
+}
+
+void MainLoop::save_crash() {
+  using boost::format;
+  using boost::str;
+  MPI_Communication *comm = MPI_Communication::instance();
+  string fname_final = str(format("crash_file.h5_%d") % comm->rank());
+  string fname_build = fname_final + "_build";
+
+  {
+    H5::H5File f(fname_build, H5F_ACC_TRUNC);
+    state.saveState(f);
+  }
+
+  rename(fname_build.c_str(), fname_final.c_str());
+}
+
+void MainLoop::run() {
+  ConsoleContext<LOG_STD> ctx("MainLoop::run");
+  int count = 0;
+  Progress<LOG_STD> progress = Console::instance().start_progress<LOG_STD>(
+      "Main loop iteration", mclist.size(), 30);
+  for (MCList::iterator i = mclist.begin(); i != mclist.end(); ++i) {
+    int looping = i->second;
+    for (int j = 0; j < looping; j++)
+      i->first->sample(state);
+    count++;
+    progress.update(count);
+  }
+  progress.destroy();
+}
+
+void MainLoop::restore(const std::string &fname, bool flexible) {
+  Console &cons = Console::instance();
+  MPI_Communication *comm = MPI_Communication::instance();
+  string fname_full =
+      flexible ? fname
+               : (boost::str(boost::format("%s_%d") % fname % comm->rank()));
+  H5::H5File f(fname_full, 0);
+  ConsoleContext<LOG_INFO> ctx("restoration of MCMC state");
+
+  if (flexible)
+    Console::instance().print<LOG_WARNING>("Using flexible mechanism");
+
+  ctx.print("Initialize variables");
+  for (MCList::iterator i = mclist.begin(); i != mclist.end(); ++i) {
+    i->first->restore_markov(state);
+  }
+
+  ctx.print("Load markov state from file");
+  { state.restoreState(f, flexible); }
+
+  timings::load(f);
+}
--- a/libLSS/samplers/core/main_loop.hpp
+++ b/libLSS/samplers/core/main_loop.hpp
@ -0,0 +1,109 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/main_loop.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_SAMPLERS_MAINLOOP_HPP
+#define __LIBLSS_SAMPLERS_MAINLOOP_HPP
+
+#include <utility>
+#include <list>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/samplers/core/markov.hpp"
+#include "libLSS/mcmc/global_state.hpp"
+
+namespace LibLSS {
+
+    class BlockLoop;
+    class BlockSampler {
+    public:
+        typedef std::list<std::pair<std::shared_ptr<MarkovSampler>,int> > MCList;
+    protected:
+        MCList mclist;
+        friend class BlockLoop;
+    public:
+        virtual void adder(BlockSampler& s) const {
+            ConsoleContext<LOG_DEBUG> ctx("adder classic");
+            s.mclist.insert(s.mclist.end(), mclist.begin(), mclist.end());
+        }
+        
+        BlockSampler& operator<<(std::shared_ptr<MarkovSampler>&& s) {
+            ConsoleContext<LOG_DEBUG> ctx("inserter shared_ptr");
+            mclist.push_back(std::make_pair(s,1));
+            return *this;
+        }
+
+        BlockSampler& operator<<(std::shared_ptr<MarkovSampler>& s) {
+            ConsoleContext<LOG_DEBUG> ctx("inserter shared_ptr");
+            mclist.push_back(std::make_pair(s,1));
+            return *this;
+        }
+        
+        BlockSampler& operator<<(MarkovSampler& s) {
+            ConsoleContext<LOG_DEBUG> ctx("inserter");
+            mclist.push_back(std::make_pair(std::shared_ptr<MarkovSampler>(&s, [](void *) {}), 1));
+            return *this;
+        }
+        
+        BlockSampler& operator<<(const BlockSampler& l) {
+            ConsoleContext<LOG_DEBUG> ctx("adding block");
+            l.adder(*this);
+            return *this;
+        }
+    };
+    
+    class BlockLoop: public BlockSampler {
+    private:
+        int num_loop;
+    protected:
+        friend class BlockSampler;
+        // Prevent any copy.
+        BlockLoop(const BlockLoop& l) {
+            num_loop = l.num_loop;
+        }
+        BlockLoop& operator=(const BlockLoop& l) { return *this; }
+    public:
+        BlockLoop(int loop = 1) : num_loop(loop) {}
+
+        void setLoop(int loop) { num_loop = loop; }
+
+        virtual void adder(BlockSampler& s) const {
+            ConsoleContext<LOG_DEBUG> ctx("adder blockloop");
+            ctx.print(boost::format("num_loop = %d") % num_loop);
+            for (int l = 0; l < num_loop; l++)
+                s.mclist.insert(s.mclist.end(), mclist.begin(), mclist.end());
+        }
+            
+        ~BlockLoop() {}
+    };
+    
+    class MainLoop: public BlockSampler {
+    protected:
+        MarkovState state;
+        int mcmc_id;
+
+        void show_splash();
+    public:
+        MainLoop();
+        ~MainLoop();
+
+        void initialize();
+        void restore(const std::string& fname, bool flexible = false);
+        void run();
+        void save();
+        void save_crash();
+        void snap();
+
+        MarkovState& get_state() { return state; }
+        const MarkovState& get_state() const { return state; }
+    
+        void setStepID(int i) { mcmc_id = i; }
+    };
+
+}
+
+#endif
--- a/libLSS/samplers/core/markov.hpp
+++ b/libLSS/samplers/core/markov.hpp
@ -0,0 +1,48 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/markov.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_MARKOV_SAMPLER_HPP
+#define __LIBLSS_MARKOV_SAMPLER_HPP
+
+#include "libLSS/mcmc/global_state.hpp"
+
+namespace LibLSS {
+
+    class MarkovSampler {
+    protected:
+        virtual void initialize(MarkovState& state) = 0;
+        virtual void restore(MarkovState& state) = 0;
+    private:
+        bool yet_init;
+    public:
+        MarkovSampler() : yet_init(false) {}
+        virtual ~MarkovSampler() {}
+
+        virtual void sample(MarkovState& state) = 0;
+        
+        void init_markov(MarkovState& state) {
+            if (!yet_init) {
+                yet_init = true;
+                initialize(state);
+            }
+        }
+
+        void restore_markov(MarkovState& state) {
+            if (!yet_init) {
+                yet_init = true;
+                restore(state);
+            }
+        }
+
+        
+    };
+
+}
+
+#endif
--- a/libLSS/samplers/core/powerspec_tools.cpp
+++ b/libLSS/samplers/core/powerspec_tools.cpp
@ -0,0 +1,241 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/powerspec_tools.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include "libLSS/tools/console.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+
+using namespace LibLSS;
+using boost::format;
+typedef boost::multi_array_types::extent_range range;
+
+
+PowerSpectrumSampler_Base::~PowerSpectrumSampler_Base()
+{
+  if (mgr != 0)
+    delete mgr;
+}
+
+bool PowerSpectrumSampler_Base::restore_base(MarkovState& state)
+{
+    Console& cons = Console::instance();
+    ConsoleContext<LOG_INFO> ctx("power spectrum sampler (common)");
+    bool build_keys;
+
+    L0 = *state.get<SDouble>("L0");
+    L1 = *state.get<SDouble>("L1");
+    L2 = *state.get<SDouble>("L2");
+
+    N0 = *state.get<SLong>("N0");
+    N1 = *state.get<SLong>("N1");
+    N2 = *state.get<SLong>("N2");
+    N2_HC = *state.get<SLong>("N2_HC");
+
+    // Creates a manager. Then we get access to all derived quantities
+    // for parallelism.
+    mgr = new FFTMgr(N0, N1, N2, comm);
+
+
+    Ntot = N0*N1*N2;
+    volNorm = L0*L1*L2/Ntot;
+    volume = L0*L1*L2;
+
+    ctx.print(format("Power spectrum (%dx%dx%d), box (%gx%gx%g)") % N0 % N1 % N2 % L0 % L1 % L2);
+
+    N_k = *state.get<SLong>("NUM_MODES");
+    kmin = *state.get<SDouble>("K_MIN");
+    kmax = *state.get<SDouble>("K_MAX");
+
+    startN0 = mgr->startN0;
+    localN0 = mgr->localN0;
+    fourierLocalSize = mgr->allocator_real.minAllocSize;
+
+    ctx.print(format("Num modes = %d, kmin = %lg, kmax = %lg") % N_k % kmin % kmax);
+
+    rgen = state.get<RandomGen>("random_generator");
+
+    if (state.exists("powerspectrum")) {
+        k = state.get<ArrayType1d>("k_modes");
+        keys = state.get<IArrayType>("k_keys");
+        nmode = state.get<IArrayType1d>("k_nmodes");
+        key_counts = state.get<IArrayType1d>("key_counts");
+        P_sync += (P = state.get<ArrayType1d>("powerspectrum"));
+        adjustMul = state.get<IArrayType>("adjust_mode_multiplier");
+
+        build_keys = false;
+    } else {
+        cons.print<LOG_DEBUG>("Allocating power spectrum array");
+        P = new ArrayType1d(boost::extents[N_k]);
+        cons.print<LOG_DEBUG>("Allocating number of stacked modes array");
+        nmode = new IArrayType1d(boost::extents[N_k]);
+        cons.print<LOG_DEBUG>("Allocating key counts array");
+        key_counts = new IArrayType1d(boost::extents[N_k]);
+        cons.print<LOG_DEBUG>("Allocating mode list");
+        k = new ArrayType1d(boost::extents[N_k]);
+
+        cons.print<LOG_DEBUG>("Allocating mode keys array");
+        keys = new IArrayType(mgr->extents_complex());
+        keys->setRealDims(ArrayDimension(N0, N1, N2_HC));
+        cons.print<LOG_DEBUG>("Mode multiplier adjustment");
+        adjustMul = new IArrayType(mgr->extents_complex());
+        adjustMul->setRealDims(ArrayDimension(N0, N1, N2_HC));
+
+        state.newElement("k_modes", k);
+        state.newElement("k_keys", keys);
+        state.newElement("k_nmodes", nmode);
+        state.newElement("key_counts", key_counts);
+        P_sync += state.newElement("powerspectrum", P, true);
+        state.newElement("adjust_mode_multiplier", adjustMul);
+
+        build_keys = true;
+    }
+
+    {
+        ArrayType1d::ArrayType& P_array = *P->array;
+
+        for (long i = 0; i < N_k; i++)
+            P_array[i] = 1e6;
+    }
+
+    N_fourier_elements = N0*N1*N2_HC;
+    local_fourier_elements = localN0*N1*N2_HC;
+    ctx.print(boost::format("N0 = %d, N1 = %d, N2 = %d, N2_HC=%d, localN0=%d, startN0=%d") % N0 % N1 % N2 % N2_HC % localN0 % startN0);
+
+
+
+    return build_keys;
+}
+
+
+void PowerSpectrumSampler_Base::initialize_base(MarkovState& state)
+{
+    Console& cons = Console::instance();
+    bool build_keys;
+
+    build_keys = restore_base(state);
+
+    if (!build_keys) {
+        cons.print<LOG_INFO>("Keys already built. Returning.");
+        return;
+    }
+
+    {
+        ArrayType1d::ArrayType& k_array = *k->array;
+        ArrayType1d::ArrayType& P_array = *P->array;
+
+        for (long i = 0; i < N_k; i++) {
+            k_array[i] = (kmax-kmin)/N_k * double(i);
+            P_array[i] = 1e-6;
+        }
+    }
+
+
+    // Build the powerspectrum keys
+    cons.print<LOG_INFO>("Building keys");
+    IArrayType::ArrayType& array_key = *keys->array;
+    IArrayType1d::ArrayType& nmode_array = *nmode->array;
+    IArrayType1d::ArrayType& array_key_counts = *key_counts->array;
+    IArrayType::ArrayType& adjust = *adjustMul->array;
+
+    boost::array<double, 3> L = { L0, L1, L2 };
+
+    init_helpers::initialize_powerspectrum_keys(
+        *mgr, array_key, array_key_counts, adjust, nmode_array,
+        L, kmin, kmax, N_k);
+
+}
+
+
+
+
+PowerSpectrumSampler_Coloring::~PowerSpectrumSampler_Coloring()
+{
+  if (tmp_fourier != 0) {
+    MFCalls::free(tmp_fourier);
+    MFCalls::free(tmp_real);
+    MFCalls::destroy_plan(analysis_plan);
+    MFCalls::destroy_plan(synthesis_plan);
+  }
+}
+
+bool PowerSpectrumSampler_Coloring::initialize_coloring(MarkovState& state)
+{
+  ConsoleContext<LOG_INFO> ctx("coloring initialization");
+  tmp_fourier = MFCalls::alloc_complex(fourierLocalSize);
+  tmp_real = MFCalls::alloc_real(fourierLocalSize*2);
+
+  ctx.print("Creating MPI/FFTW plans for Messenger-Signal");
+  analysis_plan = MFCalls::plan_dft_r2c_3d(
+                    N0, N1, N2,
+                    tmp_real,
+                    tmp_fourier,
+#ifdef ARES_MPI_FFTW
+                    comm->comm(),
+#endif
+                   // FFTW_MPI_TRANSPOSED_OUT|
+                    FFTW_DESTROY_INPUT|FFTW_MEASURE);
+  synthesis_plan = MFCalls::plan_dft_c2r_3d(
+                    N0, N1, N2,
+                    tmp_fourier,
+                    tmp_real,
+#ifdef ARES_MPI_FFTW
+                    comm->comm(),
+#endif
+                    //FFTW_MPI_TRANSPOSED_IN|
+                    FFTW_DESTROY_INPUT|FFTW_MEASURE);
+
+  sqrt_P_info.array->resize(boost::extents[P->array->num_elements()]);
+  return true;
+}
+
+bool PowerSpectrumSampler_Coloring::restore_coloring(MarkovState& state)
+{
+  return initialize_coloring(state);
+}
+
+void PowerSpectrumSampler_Coloring::update_s_field_from_x(MarkovState& state, const ArrayType1d& powerSpec)
+{
+  ConsoleContext<LOG_DEBUG> ctx("update of s_field from x_field");
+  ArrayType& x_field = *state.get<ArrayType>("x_field");
+  ArrayType& s_field = *state.get<ArrayType>("s_field");
+
+  ctx.print(format("%p") % P);
+  Console::instance().c_assert(powerSpec.array->num_elements() == P->array->size(), "coloring works only on similar powerspectrum as the system one");
+
+  // Overwrite s
+  ctx.print("Copying x_field");
+  copy_padded_data(*x_field.array, tmp_real);
+  ctx.print("Analyzing");
+  MFCalls::execute_r2c(analysis_plan, tmp_real, tmp_fourier);
+
+  long P_size = powerSpec.array->num_elements();
+  for (long i = 0; i < P_size; i++) {
+    (*sqrt_P_info.array)[i] = std::sqrt((*powerSpec.array)[i] * volume) / (Ntot);
+  }
+
+
+  int *flat_keys = keys->array->data();
+
+  ctx.print("Coloring");
+#pragma omp parallel for schedule(static)
+  for (long i = 0; i < local_fourier_elements; i++) {
+    double sqrt_P = (*sqrt_P_info.array)[flat_keys[i]];
+
+    tmp_fourier[i][0] *= sqrt_P;
+    tmp_fourier[i][1] *= sqrt_P;
+  }
+
+  ctx.print("Synthesis");
+  MFCalls::execute_c2r(synthesis_plan, tmp_fourier, tmp_real);
+  copy_unpadded_data(tmp_real, *s_field.array);
+}
+
+void PowerSpectrumSampler_Coloring::update_s_field_from_x(MarkovState& state)
+{
+  update_s_field_from_x(state, *P);
+}
--- a/libLSS/samplers/core/powerspec_tools.hpp
+++ b/libLSS/samplers/core/powerspec_tools.hpp
@ -0,0 +1,206 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/powerspec_tools.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_POWER_SPECTRUM_TOOLS_HPP
+#define __LIBLSS_POWER_SPECTRUM_TOOLS_HPP
+
+#include "libLSS/samplers/core/markov.hpp"
+#include "libLSS/mcmc/state_sync.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include "libLSS/tools/array_tools.hpp"
+
+namespace LibLSS {
+
+  template <typename ArrayType>
+  typename ArrayType::value_type norm_v(const ArrayType &x) {
+    typename ArrayType::value_type ret = 0;
+
+    for (size_t i = 0; i < x.size(); i++)
+      ret += x[i] * x[i];
+
+    return std::sqrt(ret);
+  }
+
+  static inline int ifftfreq(int i, int N) {
+    return ((i > N / 2) ? (i - N) : i);
+  }
+
+  template <typename T>
+  T kmode(int i, int N, T L) {
+    return 2 * M_PI / L * ifftfreq(i, N);
+  }
+
+  template <typename IKArray, typename LArray>
+  int power_key(
+      const IKArray &N, const IKArray &ik, const LArray &L, double kmin,
+      double dk, int Nbin) {
+    ///calculate kmodes
+    boost::array<double, 3> k;
+    double kmod;
+
+    // 0 mode is specific
+    if (ik[0] == 0 && ik[1] == 0 && ik[2] == 0)
+      return 0;
+
+    for (int i = 0; i < 3; i++)
+      k[i] = kmode(ik[i], N[i], L[i]);
+
+    kmod = norm_v(k); /// units k [h/Mpc]
+
+    int ll = 1 + int(std::floor((kmod - kmin) / dk));
+
+    Console::instance().c_assert(
+        (ll >= 0) && (ll < Nbin), "Over/Underflow binning in powerspectrum");
+    return ll;
+  }
+
+  namespace init_helpers {
+
+    template <
+        typename Manager, typename ArrayKey, typename ArrayKeyCounts,
+        typename ArrayAdjust, typename ArrayNmode>
+    void initialize_powerspectrum_keys(
+        Manager &manager, ArrayKey &array_key, ArrayKeyCounts &array_key_counts,
+        ArrayAdjust &adjust, ArrayNmode &nmode_array,
+        boost::array<double, 3> const &L, double kmin, double kmax,
+        size_t N_k) {
+      using boost::format;
+      Console &cons = Console::instance();
+      size_t N0 = manager.N0;
+      size_t startN0 = manager.startN0;
+      size_t localN0 = manager.localN0;
+      size_t N1 = manager.N1;
+      size_t N2_HC = manager.N2_HC;
+
+      // FIX: Manager sizes should size_t.
+      boost::array<size_t, 3> iN{N0, N1, size_t(manager.N2)};
+
+      array::fill(nmode_array, 0);
+      array::fill(array_key_counts, 0);
+
+      for (size_t ikx = startN0; ikx < startN0 + localN0; ikx++) {
+        for (size_t iky = 0; iky < N1; iky++) {
+          for (size_t ikz = 0; ikz < N2_HC; ikz++) {
+            boost::array<size_t, 3> ik{ikx, iky, ikz};
+            int p_key = power_key(iN, ik, L, kmin, (kmax - kmin) / N_k, N_k);
+
+            array_key_counts[p_key]++;
+            array_key[ikx][iky][ikz] = p_key;
+            assert(p_key < N_k);
+            nmode_array[p_key] +=
+                2; // Put everybody at 2. There will be a fix after the loop.
+            adjust[ikx][iky][ikz] = 2;
+          }
+        }
+      }
+
+      // Only one mode and it is not sampling.
+      array_key_counts[0] = 0;
+
+      if (startN0 == 0 && localN0 > 0) {
+        adjust[0][0][0] = 0;
+        adjust[0][N1 / 2][0] = 1;
+        adjust[0][0][N2_HC - 1] = 1;
+        adjust[0][N1 / 2][N2_HC - 1] = 1;
+
+        nmode_array[array_key[0][0][0]] -= 2; // No mode for k=0
+        nmode_array[array_key[0][N1 / 2][0]] -= 1;
+        nmode_array[array_key[0][0][N2_HC - 1]] -= 1;
+        nmode_array[array_key[0][N1 / 2][N2_HC - 1]] -= 1;
+      }
+
+      if (startN0 <= N0 / 2 && localN0 + startN0 > N0 / 2) {
+        adjust[N0 / 2][0][0] = 1;
+        adjust[N0 / 2][N1 / 2][0] = 1;
+        adjust[N0 / 2][0][N2_HC - 1] = 1;
+        adjust[N0 / 2][N1 / 2][N2_HC - 1] = 1;
+
+        nmode_array[array_key[N0 / 2][0][0]] -=
+            1; // Hermiticity removes one free mode
+        nmode_array[array_key[N0 / 2][N1 / 2][0]] -= 1;
+        nmode_array[array_key[N0 / 2][0][N2_HC - 1]] -= 1;
+        nmode_array[array_key[N0 / 2][N1 / 2][N2_HC - 1]] -= 1;
+      }
+
+      cons.template print<LOG_DEBUG>(
+          format("Reducing mode counting: num_elements=%d") %
+          nmode_array.num_elements());
+      manager.getComm()->all_reduce_t(
+          MPI_IN_PLACE, nmode_array.data(), nmode_array.num_elements(),
+          MPI_SUM);
+      cons.template print<LOG_DEBUG>(
+          format("Reducing key counting: num_elements=%d") %
+          array_key_counts.num_elements());
+      manager.getComm()->all_reduce_t(
+          MPI_IN_PLACE, array_key_counts.data(),
+          array_key_counts.num_elements(), MPI_SUM);
+    }
+
+  } // namespace init_helpers
+
+  class PowerSpectrumSampler_Base : public MarkovSampler {
+  protected:
+    typedef FFTW_Manager_3d<double> FFTMgr;
+
+    long N0, N1, N2, N2_HC;
+    long fourierLocalSize;
+    long startN0, localN0;
+    long N_fourier_elements, local_fourier_elements;
+    long Ntot;
+    int N_k;
+    double kmin, kmax;
+    double volNorm, volume;
+    double L0, L1, L2;
+
+    FFTMgr *mgr;
+
+    IArrayType *keys, *adjustMul;
+    IArrayType1d *key_counts, *nmode;
+    ArrayType1d *P, *k;
+    RandomGen *rgen;
+    MPI_SyncBundle P_sync;
+    MPI_Communication *comm;
+
+  public:
+    PowerSpectrumSampler_Base(MPI_Communication *lcomm)
+        : mgr(0), keys(0), key_counts(0), nmode(0), P(0), k(0), rgen(0),
+          comm(lcomm) {}
+    virtual ~PowerSpectrumSampler_Base();
+
+    bool restore_base(MarkovState &state);
+    void initialize_base(MarkovState &state);
+  };
+
+  class PowerSpectrumSampler_Coloring : public PowerSpectrumSampler_Base {
+  protected:
+    MFCalls::plan_type analysis_plan, synthesis_plan;
+    MFCalls::complex_type *tmp_fourier;
+    MFCalls::real_type *tmp_real;
+    ArrayType1d sqrt_P_info;
+
+    int Ncatalog;
+
+  public:
+    PowerSpectrumSampler_Coloring(MPI_Communication *comm)
+        : PowerSpectrumSampler_Base(comm), tmp_fourier(0), tmp_real(0),
+          sqrt_P_info(boost::extents[0]) {}
+    virtual ~PowerSpectrumSampler_Coloring();
+
+    bool initialize_coloring(MarkovState &state);
+    bool restore_coloring(MarkovState &state);
+
+    void update_s_field_from_x(MarkovState &state);
+    void
+    update_s_field_from_x(MarkovState &state, const ArrayType1d &powerSpectrum);
+  };
+
+} // namespace LibLSS
+
+#endif
--- a/libLSS/samplers/core/ran_gig.cc
+++ b/libLSS/samplers/core/ran_gig.cc
@ -0,0 +1,214 @@
+#include <math.h>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>         
+#include <cassert>
+#include <cfloat>
+#include <float.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include "libLSS/samplers/core/ran_gig.h"
+
+#define EPSILON 1e-10
+
+using namespace std;
+
+/* R_zeroin2() is faster for "expensive" f(), in those typical cases where
+ *             f(ax) and f(bx) are available anyway : */
+
+double R_zeroin2(			/* An estimate of the root */
+    double ax,				/* Left border | of the range	*/
+    double bx,				/* Right border| the root is seeked*/
+    double fa, double fb,		/* f(a), f(b) */
+    double (*f)(double x, void *info),	/* Function under investigation	*/
+    void *info,				/* Add'l info passed on to f	*/
+    double *Tol,			/* Acceptable tolerance		*/
+    int *Maxit)				/* Max # of iterations */
+{
+    double a,b,c, fc;			/* Abscissae, descr. see above,  f(c) */
+    double tol;
+    int maxit;
+
+    a = ax;  b = bx;
+    c = a;   fc = fa;
+    maxit = *Maxit + 1; tol = * Tol;
+
+    /* First test if we have found a root at an endpoint */
+    if(fa == 0.0) {
+	*Tol = 0.0;
+	*Maxit = 0;
+	return a;
+    }
+    if(fb ==  0.0) {
+	*Tol = 0.0;
+	*Maxit = 0;
+	return b;
+    }
+
+    while(maxit--)		/* Main iteration loop	*/
+    {
+	double prev_step = b-a;		/* Distance from the last but one
+					   to the last approximation	*/
+	double tol_act;			/* Actual tolerance		*/
+	double p;			/* Interpolation step is calcu- */
+	double q;			/* lated in the form p/q; divi-
+					 * sion operations is delayed
+					 * until the last moment	*/
+	double new_step;		/* Step at this iteration	*/
+
+	if( fabs(fc) < fabs(fb) )
+	{				/* Swap data for b to be the	*/
+	    a = b;  b = c;  c = a;	/* best approximation		*/
+	    fa=fb;  fb=fc;  fc=fa;
+	}
+	tol_act = 2.*EPSILON*fabs(b) + tol/2.;
+	new_step = (c-b)/2.;
+
+	if( fabs(new_step) <= tol_act || fb == (double)0 )
+	{
+	    *Maxit -= maxit;
+	    *Tol = fabs(c-b);
+	    return b;			/* Acceptable approx. is found	*/
+	}
+
+	/* Decide if the interpolation can be tried	*/
+	if( fabs(prev_step) >= tol_act	/* If prev_step was large enough*/
+	    && fabs(fa) > fabs(fb) ) {	/* and was in true direction,
+					 * Interpolation may be tried	*/
+	    register double t1,cb,t2;
+	    cb = c-b;
+	    if( a==c ) {		/* If we have only two distinct	*/
+					/* points linear interpolation	*/
+		t1 = fb/fa;		/* can only be applied		*/
+		p = cb*t1;
+		q = 1.0 - t1;
+	    }
+	    else {			/* Quadric inverse interpolation*/
+
+		q = fa/fc;  t1 = fb/fc;	 t2 = fb/fa;
+		p = t2 * ( cb*q*(q-t1) - (b-a)*(t1-1.0) );
+		q = (q-1.0) * (t1-1.0) * (t2-1.0);
+	    }
+	    if( p>(double)0 )		/* p was calculated with the */
+		q = -q;			/* opposite sign; make p positive */
+	    else			/* and assign possible minus to	*/
+		p = -p;			/* q				*/
+
+	    if( p < (0.75*cb*q-fabs(tol_act*q)/2.) /* If b+p/q falls in [b,c]*/
+		&& p < fabs(prev_step*q/2.) )	/* and isn't too large	*/
+		new_step = p/q;			/* it is accepted
+						 * If p/q is too large then the
+						 * bisection procedure can
+						 * reduce [b,c] range to more
+						 * extent */
+	}
+
+	if( fabs(new_step) < tol_act) {	/* Adjust the step to be not less*/
+	    if( new_step > (double)0 )	/* than tolerance		*/
+		new_step = tol_act;
+	    else
+		new_step = -tol_act;
+	}
+	a = b;	fa = fb;			/* Save the previous approx. */
+	b += new_step;	fb = (*f)(b, info);	/* Do step to a new approxim. */
+	if( (fb > 0. && fc > 0.) || (fb < 0. && fc < 0.) ) {
+	    /* Adjust c for it to have a sign opposite to that of b */
+	    c = a;  fc = fa;
+	}
+
+    }
+    /* failed! */
+    *Tol = fabs(c-b);
+    *Maxit = -1;
+    return b;
+}
+
+
+double R_zeroin(			/* An estimate of the root */
+    double ax,				/* Left border | of the range	*/
+    double bx,				/* Right border| the root is seeked*/
+    double (*f)(double x, void *info),	/* Function under investigation	*/
+    void *info,				/* Add'l info passed on to f	*/
+    double *Tol,			/* Acceptable tolerance		*/
+    int *Maxit)				/* Max # of iterations */
+{
+    double fa = (*f)(ax, info);
+    double fb = (*f)(bx, info);
+    return R_zeroin2(ax, bx, fa, fb, f, info, Tol, Maxit);
+}
+
+
+
+double g(double y, void *params)
+{
+  double *aux = (double *)params;
+  double beta=aux[0];
+  double lambda=aux[1];
+  double m=aux[2];
+  
+return(0.5*beta*y*y*y - y*y*(0.5*beta*m+lambda+1) + y*((lambda-1)*m-0.5*beta) + 0.5*beta*m);
+}
+
+double LibLSS::ran_gig(double chi, double psi, double lambda,gsl_rng * SEED)
+{
+// Function to generate random observations from a
+// generalized inverse Gaussian distribution. The
+// algorithm is based on that given by Dagpunar (1989)
+
+  if(chi<0.) {cout << "chi can not be negative"<<endl; return 0.;}
+  if(psi<0.) {cout << "psi can not be negative"<<endl; return 0.;}
+
+  if((lambda>=0.)&&(psi==0.))
+  {
+    cout << "When lambda >= 0, psi must be > 0"<<endl;
+    return 0.;
+  }
+
+  if((lambda<=0.)&(chi==0.))
+  {
+    cout <<"When lambda <= 0, chi must be > 0"<<endl;
+    return 0.;
+  }
+ 
+  if(chi==0.) {cout <<"chi = 0, use rgamma"<<endl; return 0.;}
+  if(psi==0.) {cout <<"algorithm only valid for psi > 0"<<endl; return 0.;}
+
+  double alpha=sqrt(psi/chi);
+  double beta=sqrt(psi*chi);
+
+  double m=(lambda-1.+sqrt((lambda-1.)*(lambda-1.)+beta*beta))/beta;
+    
+  double upper = m;
+  
+  double params[3];
+  params[0]=beta;
+  params[1]=lambda;
+  params[2]=m;
+  
+  while(g(upper,params)<=0.) upper = 2.*upper;
+
+
+  double tol=1e-10;
+  int maxit=10000;
+    
+  double yM =R_zeroin(0.,m,&g, &params,&tol,&maxit);// uniroot(g,interval=c(0,m))$root
+  double yP =R_zeroin(m,upper,&g, &params,&tol,&maxit);// uniroot(g,interval=c(m,upper))$root
+  
+  double a = (yP-m)*exp(-0.25*beta*(yP+1./yP-m-1./m)+(log(yP) -log(m))*(0.5*(lambda-1.)) );
+  double b = (yM-m)*exp(-0.25*beta*(yM+1./yM-m-1./m)+(log(yM) -log(m))*(0.5*(lambda-1.)) );
+  double c = -0.25*beta*(m+1./m) + 0.5*(lambda-1.)*log(m);
+
+  double y=0;
+
+  while(true){
+      double R1 = gsl_rng_uniform (SEED);
+      double R2 = gsl_rng_uniform (SEED);
+      y= m + a*R2/R1 + b*(1.-R2)/R1;
+      if((y>0.) && (-log(R1)>=-0.5*(lambda-1.)*log(y)+0.25*beta*(y+1./y)+c)) break;
+  }
+  
+  return(y/alpha);
+}
--- a/libLSS/samplers/core/ran_gig.h
+++ b/libLSS/samplers/core/ran_gig.h
@ -0,0 +1,19 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/ran_gig.h
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_RANGIG_H
+#define __LIBLSS_RANGIG_H
+
+namespace LibLSS {
+
+double ran_gig(double chi, double psi, double lambda,gsl_rng * SEED);
+
+}
+
+#endif
--- a/libLSS/samplers/core/random_number.hpp
+++ b/libLSS/samplers/core/random_number.hpp
@ -0,0 +1,392 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/random_number.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __RANDOM_NUMBER_HPP
+#define __RANDOM_NUMBER_HPP
+
+#include <cmath>
+#include <boost/format.hpp>
+#include <functional>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/errors.hpp"
+#include "libLSS/tools/openmp.hpp"
+#include <H5Cpp.h>
+#include <CosmoTool/hdf5_array.hpp>
+#include <iostream>
+#include "libLSS/tools/array_concepts.hpp"
+#include "libLSS/tools/fusewrapper.hpp"
+#include "libLSS/tools/fused_array.hpp"
+#include "libLSS/tools/hdf5_type.hpp"
+
+namespace LibLSS {
+
+    class RandomNumber;
+    namespace Random_details {
+      // This is a workaround for a bug in GCC compiler which
+      // has a problem using boost::bind on member function in declspec.
+      inline unsigned int real_poisson(RandomNumber *rgen, double nmean);
+      inline double real_gaussian(RandomNumber *rgen, double dev);
+      inline unsigned int real_gamma(RandomNumber *rgen, double a, double b);
+      inline unsigned int real_negative_binomial(RandomNumber *rgen, double p, double n);
+    }
+
+    /**
+     * Fundamental class to provide random number generation.
+     */ 
+    class RandomNumber
+    {
+    public:
+        virtual ~RandomNumber() {}
+        /**
+         * This returns a random 32-bit integer, uniformly distributed.
+         * @return a random integer
+         */ 
+        virtual unsigned long int get() = 0;
+        /**
+         * This returns a uniformly distributed double precision floating point.
+         * @param a random floating point.
+         */
+        virtual double uniform() = 0;
+
+        /**
+         * Provide a seed to initialize the Pseudo Random Number Generator.
+         * @param s a seed value
+         */
+        virtual void seed(unsigned long int s) = 0;
+        /**
+         * Save the internal state of the PRNG into the provided HDF5 group.
+         * @param s an HDF5 group.
+         */
+        virtual void save(H5_CommonFileGroup& s) = 0;
+        /**
+         * Restore the internal state of the PRNG from the provided HDF5 group.
+         * @param s        an HDF5 group.
+         * @param flexible specify if we accept some inconsistency in the input group (i.e. different thread count).
+         */
+        virtual void restore(H5_CommonFileGroup& s, bool flexible = false) = 0;
+
+        double gaussian_ratio();
+
+        /**
+         * Generate a Poisson distributed random integer with the specified intensity.
+         * @param mean Poisson intensity 
+         */ 
+        virtual unsigned int poisson(double mean) = 0;
+        virtual unsigned int negative_binomial(double p, double n) = 0;
+
+        /**
+         * Generate numbers that are uniformly distributed. The actual value in wrap is ignored. It is only
+         * used to determine the rank of the required array.
+         * @param wrap a wrapped expression to provide the rank of the output array. 
+         */ 
+        template<typename Array, bool copy>
+        void uniform(LibLSS::FuseWrapper_detail::Wrapper<Array,copy> wrap) {
+          wrap = b_va_fused<double,Array::dimensionality>([this](int, ...)->double { return this->uniform(); });
+        }
+
+        // Only activate this overload if the type looks like an array
+        template<typename Array, bool copy, typename Array2, bool copy2>
+        auto gamma(LibLSS::FuseWrapper_detail::Wrapper<Array,copy> wrap, LibLSS::FuseWrapper_detail::Wrapper<Array2,copy2> wrap2)
+        {
+          return LibLSS::fwrap(
+            LibLSS::b_va_fused<unsigned int>(
+              std::bind(&LibLSS::Random_details::real_gamma,
+                          this, std::placeholders::_1, std::placeholders::_2),
+              wrap.forward_wrap(),
+              wrap2.forward_wrap()
+            )
+          );
+        }
+
+        /**
+         * Build an expression that generate random number that are distributed as a Negative Binomial
+         * with an intensity as provided by the wrap expression, and an additional miss term with the second
+         * expression.
+         * @param wrap the expression providing the Poisson intensity.
+         * @param wrap2 the expression providing the missed intensity.
+         */
+        template<typename Array, bool copy, typename Array2, bool copy2>
+        auto negative_binomial(LibLSS::FuseWrapper_detail::Wrapper<Array,copy> wrap, LibLSS::FuseWrapper_detail::Wrapper<Array2,copy2> wrap2)
+        {
+          return LibLSS::fwrap(
+            LibLSS::b_va_fused<unsigned int>(
+              std::bind(&LibLSS::Random_details::real_negative_binomial,
+                          this, std::placeholders::_1, std::placeholders::_2),
+              wrap.forward_wrap(),
+              wrap2.forward_wrap()
+            )
+          );
+        }
+
+
+
+        /**
+         * Build an expression that generate random number that are Poisson distributed
+         * with an intensity as provided by the wrap expression.
+         * @param wrap the expression providing the Poisson intensity.
+         */
+        template<typename Array, bool copy>
+        auto poisson(LibLSS::FuseWrapper_detail::Wrapper<Array,copy> wrap)
+        {
+          return LibLSS::fwrap(
+            LibLSS::b_va_fused<unsigned int>(
+              std::bind(&LibLSS::Random_details::real_poisson,
+                          this, std::placeholders::_1),
+              wrap.forward_wrap()
+            )
+          );
+        }
+
+        /**
+         * Build an expression that generate gaussian random number whose standard deviation is determined
+         * by the input wrapped expression. The mean is set to zero.
+         * @param wrap the expression providing the standard deviation.
+         */
+        template<typename Array, bool copy>
+        auto gaussian(LibLSS::FuseWrapper_detail::Wrapper<Array,copy> wrap)
+        {
+          return LibLSS::fwrap(
+            LibLSS::b_va_fused<typename Array::element>(
+              std::bind(&LibLSS::Random_details::real_gaussian,
+                          this, std::placeholders::_1),
+              wrap.forward_wrap()
+            )
+          );
+        }
+
+
+        /**
+         * Return a single random number gaussian distributed 
+         */
+        double gaussian() { return gaussian_ratio(); }
+        virtual double gamma(double a, double b) = 0;
+    };
+
+    namespace Random_details {
+      inline  unsigned int real_poisson(RandomNumber *rgen, double nmean) {
+          return rgen->poisson(nmean);
+      }
+
+      inline  unsigned int real_gamma(RandomNumber *rgen, double a, double b) {
+          return rgen->gamma(a, b);
+      }
+
+      inline  unsigned int real_negative_binomial(RandomNumber *rgen, double a, double b) {
+          return rgen->negative_binomial(a, b);
+      }
+
+      inline double real_gaussian(RandomNumber *rgen, double a) {
+        return rgen->gaussian()*a;
+      }
+    }
+
+    /**
+     * A Random number generator that works in multi-threaded environment. 
+     * The base class is provided through a template argument.
+     */
+    template<typename BaseGenerator>
+    class RandomNumberThreaded: public RandomNumber {
+    protected:
+        RandomNumberThreaded()
+            : gens(0), numGenerators(0) {
+
+        }
+
+    public:
+        typedef BaseGenerator base_type;
+
+        BaseGenerator *gens;
+        int numGenerators;
+
+        void realInit(BaseGenerator& b, int force_max) {
+            using boost::format;
+
+            numGenerators = (force_max < 0) ? smp_get_max_threads() : force_max;
+
+            Console::instance().format<LOG_INFO>(
+                        "Initializing %d threaded random number generators", numGenerators
+                );
+
+            gens = new BaseGenerator[numGenerators];
+
+            // Not great entropy
+            for (int i = 0; i < numGenerators; i++)
+                gens[i].seed(b.get());
+        }
+
+        /**
+         * Constructor.
+         * @param force_max  an argument to specific the maximum number of threads that will
+         *                   be used. If equal to -1, it will get the current limit from OpenMP.
+         */
+        RandomNumberThreaded(int force_max) {
+            BaseGenerator b;
+
+            realInit(b, force_max);
+        }
+
+
+        /**
+         * Return the base generator for the current thread
+         * @return the fundamental generator for the current thread.
+         */
+        BaseGenerator &base() {
+            return gens[smp_get_thread_id()];
+        }
+
+        /**
+         * Destructor.
+         */
+        virtual ~RandomNumberThreaded() {
+            if (gens == 0)
+                return;
+
+            Console::instance().print<LOG_INFO>(
+                        "Cleaning up parallel random number generators"
+            );
+
+            delete[] gens;
+        }
+
+        virtual void seed(unsigned long s) {
+            BaseGenerator b;
+            Console::instance().format<LOG_VERBOSE>("THREADED: Changing random number generation seed with %ld", s);
+
+            b.seed(s);
+            for (int i = 0; i < numGenerators; i++)
+                gens[i].seed(b.get());
+        }
+
+        virtual unsigned long get() {
+            return base().get();
+        }
+
+        virtual double uniform() {
+            return base().uniform();
+        }
+
+        virtual unsigned int poisson(double mean) {
+            return base().poisson(mean);
+        }
+
+        virtual double gamma(double a, double b) {
+            return base().gamma(a, b);
+        }
+
+        virtual unsigned int negative_binomial(double p, double n) {
+            return base().negative_binomial(p, n);
+        }
+
+        using RandomNumber::poisson;
+        using RandomNumber::gamma;
+        using RandomNumber::negative_binomial;
+        using RandomNumber::gaussian;
+        using RandomNumber::uniform;
+
+
+        virtual void save(H5_CommonFileGroup& g) {
+            using boost::str;
+            using boost::format;
+            boost::multi_array<int, 1> gen_array(boost::extents[1]);
+
+            gen_array[0] = numGenerators;
+            CosmoTool::hdf5_write_array(g, "num_generators", gen_array);
+            for (int i = 0; i < numGenerators; i++) {
+                H5::Group subg = g.createGroup(str(format("generator_%d") % i));
+                gens[i].save(subg);
+            }
+        }
+
+        virtual void restore(H5_CommonFileGroup& g, bool flexible = false) {
+            using boost::str;
+            using boost::format;
+            boost::multi_array<int, 1> gen_array;
+
+            CosmoTool::hdf5_read_array(g, "num_generators", gen_array);
+            if (gen_array[0] != numGenerators) {
+                std::string s = str(boost::format(
+                          "The current number of threads (%d) is not compatible with file state (%d)")
+                          % numGenerators % gen_array[0]);
+
+                if (!flexible) {
+                  error_helper<ErrorBadState>(s);
+                } else {
+                  Console::instance().print<LOG_WARNING>(s);
+                }
+            }
+
+            int num_to_read = std::min(numGenerators, gen_array[0]);
+            for (int i = 0; i < num_to_read; i++) {
+                H5::Group subg = g.openGroup(str(format("generator_%d") % i));
+                gens[i].restore(subg, flexible);
+            }
+        }
+    };
+
+    /**
+     * A random number generator that works in MPI environment.
+     */
+    template<typename BaseGenerator>
+    class RandomNumberMPI: public RandomNumberThreaded<BaseGenerator> {
+    public:
+        typedef RandomNumberThreaded<BaseGenerator> BaseClass;
+        MPI_Communication *comm;
+
+        /**
+          * Constructor.
+          * @param comm       an MPI communicator over which the PRNG must work.
+          * @param force_max  sets the maximum number of threads per MPI task that will in parallel.
+          */ 
+        RandomNumberMPI(MPI_Communication *_comm, int force_max)
+            : BaseClass(), comm(_comm) {
+            BaseGenerator b;
+            unsigned long int seedVal = 0;
+
+            if (comm->rank() == 0) {
+                for (int r = 1; r < comm->size(); r++) {
+                    seedVal = b.get();
+                    comm->send(&seedVal, 1, translateMPIType<unsigned long int>(), r, 0);
+                }
+            } else {
+                comm->recv(&seedVal, 1, translateMPIType<unsigned long int>(), 0, 0);
+            }
+
+            b.seed(seedVal);
+
+            this->realInit(b, force_max);
+        }
+
+        virtual void seed(unsigned long s) {
+            BaseGenerator b;
+            unsigned long int seedVal;
+
+            Console::instance().format<LOG_VERBOSE>("MPI: Changing random number generation seed with %ld", s);
+            b.seed(s);
+            if (comm->rank() == 0) {
+                for (int r = 1; r < comm->size(); r++) {
+                    seedVal = b.get();
+                    comm->send(&seedVal, 1, translateMPIType<unsigned long int>(), r, 0);
+                }
+                seedVal = b.get();
+            } else {
+                comm->recv(&seedVal, 1, translateMPIType<unsigned long int>(), 0, 0);
+            }
+
+            BaseClass::seed(seedVal);
+        }
+    };
+
+
+
+
+#include "gaussian_ratio.tcc"
+
+};
+
+#endif
--- a/libLSS/samplers/core/types_samplers.hpp
+++ b/libLSS/samplers/core/types_samplers.hpp
@ -0,0 +1,94 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/core/types_samplers.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_TYPES_SAMPLERS_HPP
+#define __LIBLSS_TYPES_SAMPLERS_HPP
+
+#include "libLSS/mpi/generic_mpi.hpp"
+#include <CosmoTool/fourier/fft/fftw_calls.hpp>
+#ifdef ARES_MPI_FFTW
+#  include <CosmoTool/fourier/fft/fftw_calls_mpi.hpp>
+#endif
+#include "libLSS/tools/fftw_allocator.hpp"
+#include "libLSS/tools/uninitialized_type.hpp"
+#include "libLSS/mcmc/state_element.hpp"
+#include "libLSS/samplers/core/random_number.hpp"
+#include <boost/multi_array/storage_order.hpp>
+#include "libLSS/tools/memusage.hpp"
+
+namespace LibLSS {
+
+  template <typename T, size_t N>
+  using multi_array = boost::multi_array<T, N, LibLSS::track_allocator<T>>;
+
+  template <typename T, size_t N>
+  using const_multi_array_ref = boost::const_multi_array_ref<T, N>;
+
+  template <typename T, size_t N>
+  using multi_array_ref = boost::multi_array_ref<T, N>;
+
+  typedef CosmoTool::FFTW_Calls<double> FCalls;
+#ifdef ARES_MPI_FFTW
+  typedef CosmoTool::FFTW_MPI_Calls<double> MPI_FCalls;
+  typedef MPI_FCalls MFCalls;
+#else
+  typedef FCalls MFCalls;
+#endif
+  typedef ScalarStateElement<long> SLong;
+  typedef ScalarStateElement<double> SDouble;
+  typedef ScalarStateElement<bool> SBool;
+  typedef ArrayStateElement<double, 3, FFTW_Allocator<double>, true> ArrayType;
+  typedef ArrayStateElement<
+      std::complex<double>, 3, FFTW_Allocator<std::complex<double>>, true>
+      CArrayType;
+  typedef ArrayStateElement<int, 3, LibLSS::track_allocator<int>, true>
+      IArrayType;
+  typedef ArrayStateElement<double, 1, LibLSS::track_allocator<double>>
+      ArrayType1d;
+  typedef ArrayStateElement<int, 1, LibLSS::track_allocator<int>> IArrayType1d;
+  typedef RandomStateElement<RandomNumber> RandomGen;
+  typedef ArrayStateElement<double, 3, FFTW_Allocator<double>, true>
+      SelArrayType;
+
+  typedef CArrayType::ArrayType FFTW_Complex_Array;
+  typedef ArrayType::ArrayType FFTW_Real_Array;
+
+  typedef CArrayType::RefArrayType FFTW_Complex_Array_ref;
+  typedef ArrayType::RefArrayType FFTW_Real_Array_ref;
+
+  typedef UninitializedArray<
+      FFTW_Complex_Array, FFTW_Allocator<std::complex<double>>>
+      Uninit_FFTW_Complex_Array;
+  typedef UninitializedArray<FFTW_Real_Array, FFTW_Allocator<double>>
+      Uninit_FFTW_Real_Array;
+
+  namespace init_helpers {
+    // This is a noop when no argument is given
+    template <size_t i, typename Array>
+    void ArrayDimension_adder(Array &A) {}
+
+    // Fill the i-th value of the array recursively.
+    template <size_t i, typename Array, typename... Ntype>
+    void ArrayDimension_adder(Array &A, size_t iN, Ntype... Ns) {
+      A[i] = iN;
+      ArrayDimension_adder<i + 1>(A, Ns...);
+    }
+
+  } // namespace init_helpers
+
+  template <typename... Ntype>
+  inline boost::array<size_t, sizeof...(Ntype)> ArrayDimension(Ntype... Ns) {
+    boost::array<size_t, sizeof...(Ntype)> A;
+    init_helpers::ArrayDimension_adder<0>(A, Ns...);
+    return A;
+  }
+
+} // namespace LibLSS
+
+#endif
--- a/libLSS/samplers/rgen/gsl_miser.hpp
+++ b/libLSS/samplers/rgen/gsl_miser.hpp
@ -0,0 +1,124 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/rgen/gsl_miser.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __GSL_RANDOM_NUMBER_MISER_HPP
+#define __GSL_RANDOM_NUMBER_MISER_HPP
+
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_monte.h>
+#include <gsl/gsl_monte_miser.h>
+#include <cstring>
+#include "libLSS/tools/errors.hpp"
+#include "libLSS/samplers/core/random_number.hpp"
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+
+namespace LibLSS {
+
+  /**
+   * This is an adaptor class for the MISER integrator in GSL.
+   * It handles the life cycle of the MISER object, and support for a generic
+   * functor for the integrand.
+   */
+  class GSL_Miser {
+  protected:
+    gsl_monte_miser_state *state;
+    size_t Nd;
+
+    template<typename Functor>
+    struct MiserCall {
+      Functor f;
+      
+      MiserCall(Functor g) : f(g) {}
+    };
+
+    template<typename Functor>
+    static double adaptor_functor(double *x, size_t, void *params)
+    {
+      MiserCall<Functor> *c = (MiserCall<Functor> *) params;
+      
+      return c->f(x);
+    }
+  
+  public:
+    /**
+     * Constructor.
+     * @param dim number of dimensions over which the integration will occur.
+     */
+    GSL_Miser(size_t dim) 
+      : state(0), Nd(dim) {
+      state = gsl_monte_miser_alloc(dim);
+    }
+    
+    /**
+     * Destructor.
+     */
+    ~GSL_Miser() {
+      gsl_monte_miser_free(state);
+    }
+    
+    /**
+     * Integrate the provided integrand over some range, with a maximum number of calls. A bound
+     * on the maximum error is returned.
+     * Here is a use example:
+     *
+     * @code
+     *   // ...
+     *   size_t calls = 10;
+     *   double xl[2] = {0, 0};
+     *   double xu[2] = {1, 2};
+     *   double value;
+     *
+     *   GSL_Miser miser(2);   // 2-dimensions
+     *   value = miser.integrate(rng, [](double *x) {
+     *     // do something with x[0], x[1]
+     *     return x[0]*x[0] + x[1]*x[1]; // for example sum(x^2)
+     *   }, xl, xu, calls, abserr);
+     *   //...
+     * @endcode
+     *
+     * @param rng Class adapting the GSL random number generator
+     * @param f Functor representing the integrand. It must have one pointer to double and return a double.
+     * @param xl lower bound for integration (N-dimension contiguous C-array)
+     * @param xu upper bound for integration
+     * @param calls maximum number of calls
+     * @param abserr return medium for estimated maximum absolute error
+     *
+     */
+    // Only valid for GSL
+    template<typename Functor,typename A>
+    double integrate(GSL_RandomNumber& rng, Functor f, A& xl, A& xu, size_t calls, double &abserr) {
+      gsl_monte_function mf;
+      MiserCall<Functor> call(f);
+      double result;
+      int err;
+
+      mf.f = &adaptor_functor<Functor>;
+      mf.dim = Nd;
+      mf.params = &call;
+      
+      if ((err = gsl_monte_miser_integrate(&mf, &xl[0], &xu[0], Nd, calls, rng.rng, state, &result, &abserr)) != GSL_SUCCESS)
+        error_helper<ErrorGSL>(boost::format("Error while doing monte carlo integration: error code = %d ") % err);
+      return result;
+    }
+    
+    /**
+     * Use a multi-threaded random number generator deriving from a base "Rng".
+     * This is a helper class to unwrap the GSL base class for the random number generation.
+     * @see integrate(GSL_RandomNumber& rng, Functor f, A& xl, A& xu, size_t calls, double &abserr) 
+     */ 
+    template<typename Rng, typename Functor, typename A>
+    double integrate(RandomNumberThreaded<Rng>& rng, Functor f, A& xl, A& xu, size_t calls, double &abserr) {
+      return integrate(rng.base(), f, xl, xu, calls, abserr);
+    }
+  };
+
+}
+
+#endif
--- a/libLSS/samplers/rgen/gsl_random_number.hpp
+++ b/libLSS/samplers/rgen/gsl_random_number.hpp
@ -0,0 +1,91 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/rgen/gsl_random_number.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __GSL_RANDOM_NUMBER_HPP
+#define __GSL_RANDOM_NUMBER_HPP
+
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include <cstring>
+#include "libLSS/tools/errors.hpp"
+#include "libLSS/samplers/core/random_number.hpp"
+
+namespace LibLSS {
+
+  class GSL_RandomNumber: public RandomNumber
+  {
+  public:
+    gsl_rng *rng;
+
+    GSL_RandomNumber() :
+        rng(gsl_rng_alloc(gsl_rng_mt19937)) {
+    }
+
+    ~GSL_RandomNumber() {
+        gsl_rng_free(rng);
+    }
+
+    virtual double uniform() {
+        return gsl_rng_uniform(rng);
+    }
+
+    virtual double unitexp() {
+          return gsl_ran_exponential(rng, 1.);
+    }
+
+    virtual void seed(unsigned long i) {
+        Console::instance().print<LOG_DEBUG>(boost::format("GSL: Changing random number generation seed with %ld") % i);
+        gsl_rng_set(rng, i);
+    }
+
+    virtual unsigned long get() {
+        return gsl_rng_get(rng);
+    }
+
+    using RandomNumber::poisson;
+    using RandomNumber::gaussian;
+    using RandomNumber::gamma;
+    using RandomNumber::negative_binomial;
+
+    virtual unsigned int poisson(double mean) {
+        return gsl_ran_poisson(rng, mean);
+    }
+
+    virtual unsigned int negative_binomial(double p, double n) {
+        return gsl_ran_negative_binomial(rng, p, n);
+    }
+
+    virtual double gamma(double a, double b) {
+        return gsl_ran_gamma(rng, a, b);
+    }
+
+    virtual void save(H5_CommonFileGroup& g) {
+        boost::multi_array<char, 1> out(boost::extents[gsl_rng_size(rng)]);
+        ::memcpy(out.origin(), gsl_rng_state(rng), gsl_rng_size(rng));
+        CosmoTool::hdf5_write_array(g, "state", out);
+    }
+
+    virtual void restore(H5_CommonFileGroup& g, bool flexible) {
+        size_t sz = gsl_rng_size(rng);
+        boost::multi_array<char, 1> in;
+
+        CosmoTool::hdf5_read_array(g, "state", in);
+
+
+        if (in.shape()[0] != sz) {
+            error_helper<ErrorIO>("Could not read state in GSL_RandomNumber");
+        }
+        memcpy(gsl_rng_state(rng), in.origin(), sz);
+    }
+  };
+
+
+};
+
+#endif
--- a/libLSS/samplers/rgen/slice_sweep.hpp
+++ b/libLSS/samplers/rgen/slice_sweep.hpp
@ -0,0 +1,231 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/samplers/rgen/slice_sweep.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef _LIBLSS_SLICE_SWEEP_HPP
+#define _LIBLSS_SLICE_SWEEP_HPP
+
+#include "libLSS/mpi/generic_mpi.hpp"
+#include <cmath>
+
+// These algorithms are described in https://www.aquila-consortium.org/wiki/index.php/File:Slice_sampling_Neal_97.pdf
+
+
+namespace LibLSS {
+
+  namespace slice_details {
+    template<typename LogLikelihood>
+    double request(MPI_Communication *comm, LogLikelihood lh, double a, int ROOT) {
+      int job = 1;
+      comm->broadcast_t(&job, 1, ROOT);
+      comm->broadcast_t(&a, 1, ROOT);
+      return lh(a);
+    }
+
+    inline void shutdown(MPI_Communication *comm, double a, int ROOT) {
+      int job = 0;
+      comm->broadcast_t(&job, 1, ROOT);
+      comm->broadcast_t(&a, 1, ROOT);
+    }
+
+    inline int grab_job(MPI_Communication *comm, double& a, int ROOT) {
+      int job;
+      comm->broadcast_t(&job, 1, ROOT);
+      comm->broadcast_t(&a, 1, ROOT);
+      return job;
+    }
+  }
+
+  template<typename Random, typename LogLikelihood>
+  double slice_sweep(MPI_Communication *comm, Random& rng, LogLikelihood lh, double a0, double step, int ROOT = 0)
+  {
+Console::instance().print<LOG_DEBUG>("Doing slicesweep EARLY init");
+    if (comm->rank() != ROOT) {
+      double v;
+      while (slice_details::grab_job(comm, v, ROOT)) {
+        lh(v);
+      }
+      return v;
+    }
+
+Console::instance().print<LOG_DEBUG>("Doing slicesweep init");
+    double logp0 = slice_details::request(comm, lh, a0, ROOT);
+    double logu = logp0 + std::log(1-rng.uniform());//draw from (0,1], to avoid log(0)
+    Console::instance().c_assert(!std::isnan(logu), "logu must not be a NaN");
+    double rr = rng.uniform();
+    double al = a0 - rr*step;
+    double ar = a0 + (1-rr)*step;
+    
+Console::instance().print<LOG_DEBUG>(boost::format("First loop (logu = %lg)") % logu);
+    while (true) {
+      double logpl = slice_details::request(comm, lh, al, ROOT);
+      if (logpl < logu)
+        break;
+      al -= step;
+    }
+    
+Console::instance().print<LOG_DEBUG>("Second loop");
+    while (true) { 
+      double logpr = slice_details::request(comm, lh, ar, ROOT);
+      if (logpr < logu)
+        break;
+      ar += step;
+    }
+    
+Console::instance().print<LOG_DEBUG>("Last loop");
+    while (true) {
+      double a1 = rng.uniform() * (ar - al) + al;
+      double logp1 = slice_details::request(comm, lh, a1, ROOT);
+      
+      if (logp1 > logu) {
+        slice_details::shutdown(comm, a1, ROOT);
+        return a1;
+      } else {
+        // Shrink bracket
+        if (a1 > a0) 
+          ar = a1;
+        else
+          al = a1;
+      }
+    }
+  }
+
+  template<typename Random, typename LogLikelihood>
+  double slice_sweep(Random& rng, LogLikelihood lh, double a0, double step)
+  {
+    double logp0 = lh(a0);
+    double logu = logp0 + std::log(1-rng.uniform());//draw from (0,1], to avoid log(0)
+    Console::instance().c_assert(!std::isnan(logu), "logu must not be a NaN");
+    double rr = rng.uniform();
+    double al = a0 - rr*step;
+    double ar = a0 + (1-rr)*step;
+    
+    while (true) {
+      double logpl = lh(al);
+      if (logpl < logu)
+        break;
+      al -= step;
+    }
+    
+    while (true) { 
+      double logpr = lh(ar);
+      if (logpr < logu)
+        break;
+      ar += step;
+    }
+    
+    while (true) {
+      double a1 = rng.uniform() * (ar - al) + al;
+      double logp1 = lh(a1);
+      
+      if (logp1 > logu) {
+        return a1;
+      } else {
+        // Shrink bracket
+        if (a1 > a0) 
+          ar = a1;
+        else
+          al = a1;
+      }
+    }
+  }
+
+  template<typename Random, typename LogLikelihood>
+  double slice_sweep_double(MPI_Communication *comm, Random& rng, LogLikelihood lh, double a0, double step, int ROOT = 0)
+  {
+    ConsoleContext<LOG_DEBUG> ctx("slicesweep_double");
+
+    if (comm->rank() != ROOT) {
+      double v;
+      while (slice_details::grab_job(comm, v, ROOT)) {
+        lh(v);
+      }
+      return v;
+    }
+
+    ctx.print("INIT");
+    // Find the initial likelihood and the slice level
+    double logp0 = slice_details::request(comm, lh, a0, ROOT);
+    double logu = logp0 + std::log(1-rng.uniform());//draw from (0,1], to avoid log(0)
+    Console::instance().c_assert(!std::isnan(logu), "logu must not be a NaN");
+
+    double rr = rng.uniform();
+    double al = a0 - rr*step;
+    double ar = a0 + (1-rr)*step;
+    
+    ctx.print(boost::format("Step defining loop (logu = %lg)") % logu);
+    double logpl = slice_details::request(comm, lh, al, ROOT);
+    double logpr = slice_details::request(comm, lh, ar, ROOT);
+    while (logpl >= logu || logpr >= logu) {
+      double v= rng.uniform();
+      if (v < 0.5) {
+        al -= (ar - al);
+        logpl = slice_details::request(comm, lh, al, ROOT);
+        ctx.print(boost::format("new al=%g, logpl = %g") % al % logpl);
+      } else {
+        ar += (ar - al);
+        logpr = slice_details::request(comm, lh, ar, ROOT);
+        ctx.print(boost::format("new ar=%g, logpr = %g") % ar % logpr);
+      }
+    }
+    
+    ctx.print("Sampling loop");
+    while (true) {
+      double a1 = rng.uniform() * (ar - al) + al;
+      double logp1 = slice_details::request(comm, lh, a1, ROOT);
+      
+      if (logp1 > logu) {
+        double ar_hat = ar;
+        double al_hat = al;
+        double logpl_hat = slice_details::request(comm, lh, al_hat, ROOT);
+        double logpr_hat = slice_details::request(comm, lh, ar_hat, ROOT);
+        bool not_accepted = false;
+
+        ctx.print(boost::format("Got a candidate at a1=%g") % a1);
+
+        while ((ar_hat - al_hat) > (1.1*step) && !not_accepted) {
+          double am = 0.5 * (ar_hat+al_hat);
+
+          bool D = ((a0 < am && a1 >= am) || (a0 >= am && a1 < am));
+
+          if (a1 < am) {
+            ar_hat = am;
+            logpr_hat = slice_details::request(comm, lh, ar_hat, ROOT);
+          } else {
+            al_hat = am;
+            logpl_hat = slice_details::request(comm, lh, al_hat, ROOT);
+          }
+
+          ctx.print(boost::format("ar_hat=%lg, al_hat=%lg, logpl_hat=%lg, logpr_hat=%lg, D=%d") % ar_hat % al_hat % logpl_hat % logpr_hat % D);
+
+          if (D && logu >= logpl_hat && logu >= logpr_hat) {
+            // Not acceptable. Try again.
+            ctx.print("Not good");
+            not_accepted = true;
+          }
+        }
+
+        // Go back outside
+        if (not_accepted)
+          continue;
+
+        slice_details::shutdown(comm, a1, ROOT);
+        return a1;
+      } else {
+        // Shrink bracket
+        if (a1 > a0) 
+          ar = a1;
+        else
+          al = a1;
+      }
+    }
+  }
+
+}
+
+#endif
--- a/libLSS/tests/CMakeLists.txt
+++ b/libLSS/tests/CMakeLists.txt
@ -0,0 +1,84 @@
+include(${CMAKE_SOURCE_DIR}/cmake_modules/test_macros.cmake)
+
+SET(LIBS
+  ${COSMOTOOL_LIB}
+  ${BOOST_LIBRARIES} ${HDF5_CXX_LIBRARIES}
+  ${HEALPIX_LIBRARIES} ${HDF5_LIBRARIES}
+  ${GSL_LIBRARY} ${GSL_CBLAS_LIBRARY} ${FFTW_LIBRARIES}
+  ${ZLIB_LIBRARY}
+  ${DL_LIBRARY}
+  ${EXTRA_LIB})
+
+IF(RT_LIBRARY)
+  SET(LIBS ${LIBS} ${RT_LIBRARY} ${FFTW_LIBRARIES})
+ENDIF(RT_LIBRARY)
+
+SET(TEST_LIBRARY_SRCS)
+SET(TEST_targets)
+
+SET(TEST_base_LIST
+   console has_member proj
+   messenger messenger2 messenger3 schechter
+   rgen window3d
+   slice_sweep slice_sweep_double cic mngp uninit fused_array
+   supersampling gradient_supersampling array auto_interpolator
+   gig fuse_wrapper tuple fused_cond cic_adjoint cg
+   cpu_feature
+   r3d hdf5_buffered
+   overload class_interface
+)
+
+macro(ares_add_test_targets)
+  list(APPEND TEST_targets ${ARGN}) 
+endmacro()
+
+
+IF (BUILD_TESTING)
+  add_executable(test_stl_container test_stl_container.cpp)
+
+  foreach(module  IN LISTS ARES_MODULES ITEMS base)
+    add_liblss_test_module(${module})
+  
+    foreach(test_name IN ITEMS ${TEST_${module}_LIST})
+      if (${module} STREQUAL base)
+        set(_src ${CMAKE_SOURCE_DIR}/libLSS/tests)
+      else()
+        set(_src ${CMAKE_SOURCE_DIR}/extra/${module}/libLSS/tests)
+      endif()
+      add_executable(test_${test_name} ${_src}/test_${test_name}.cpp)
+      SET(test_lib ${LIBS})
+      if (TEST_${test_name}_LIBS)
+        SET(test_lib ${test_lib} ${TEST_${test_name}_LIBS})
+      endif()
+      target_link_libraries(test_${test_name} test_library_LSS LSS ${test_lib})
+      add_dependencies(test_${test_name} ${ares_DEPS})
+      ares_add_test_targets(test_${test_name})
+    endforeach(test_name)
+  endforeach()
+
+
+  add_library(test_library_LSS dummy_file.cpp testFramework.cpp ${TEST_LIBRARY_SOURCES})
+  add_dependencies(test_library_LSS ${ares_DEPS})
+  
+  
+  macro(list_join listname separator output)
+    set(${output})
+    foreach(X IN LISTS ${listname})
+       set(${output} "${${output}}${separator}${X}")
+    endforeach()
+  endmacro()
+  
+  list_join(TEST_targets " " _TEST_targets)
+  add_custom_target(all_tests)
+  cmessage(STATUS "Meta deps : ${_TEST_targets}")
+  add_dependencies(all_tests ${TEST_targets})
+  
+  
+  add_direct_test(test_cosmo_expansion ${CMAKE_CURRENT_SOURCE_DIR}/test_cosmo_expansion.cpp)
+  add_check_output_test(test_auto_interpolator ${CMAKE_CURRENT_SOURCE_DIR}/test_auto_interpolator.cpp "")
+  add_test_to_run(test_overload test_overload)
+  
+  #add_test(NAME adam COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_adam)
+  add_test(NAME cg COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_cg)
+
+endif()
--- a/libLSS/tests/data/gen_reference_data.py
+++ b/libLSS/tests/data/gen_reference_data.py
@ -0,0 +1,17 @@
+#+
+#   ARES/HADES/BORG Package -- ./libLSS/tests/data/gen_reference_data.py
+#   Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+#   Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+#
+#   Additional contributions from:
+#      Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+#   
+#+
+import h5py as h5
+import numpy as np
+
+with h5.File("reference_data.h5", mode="w") as f:
+  for N in [32]:
+    numbers = np.random.normal(size=(N,N,N))
+    f[f'/f_size_{N}'] = numbers
+    f[f'/c_size_{N}'] = np.fft.rfftn(numbers)
--- a/libLSS/tests/data/reference_data.h5
+++ b/libLSS/tests/data/reference_data.h5
--- a/libLSS/tests/dummy_file.cpp
+++ b/libLSS/tests/dummy_file.cpp
@ -0,0 +1,10 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/dummy_file.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+/* empty file just to quiet CMake */
--- a/libLSS/tests/plot_grav.py
+++ b/libLSS/tests/plot_grav.py
@ -0,0 +1,70 @@
+#+
+#   ARES/HADES/BORG Package -- ./libLSS/tests/plot_grav.py
+#   Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+#   Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+#
+#   Additional contributions from:
+#      Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+#   
+#+
+import h5py as h5
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+H100=100.e3
+h=0.68
+L=100.
+N=128
+G=6.67e-11
+omega_m=0.30
+Mpc_in_m=3.08567758e22
+udistance=1.*Mpc_in_m
+
+dmean = 1.0/(float(N)**3)
+
+with h5.File("gravity.h5") as f:
+  g = f['gravity'][...]
+  p = f['position'][...]
+  pot = f['potential'][...]
+  ud = f['unit_density'][0] 
+  up = f['unit_potential'][0]
+
+ud *= Mpc_in_m**3
+
+g = g[:(g.shape[0]/2),:]
+p = p[:(p.shape[0]/2),:]
+pot = pot[:(pot.shape[0]/2)]
+
+ref = np.array([L/2,0,L/2])
+
+plt.clf()
+#plt.plot(-g[:,0])
+plt.plot(p[:,1],-g[:,1])
+
+yy = p[:,1]
+
+aa = 6.67e-11 * ud * (L/N)**3 * yy/yy**3
+
+plt.plot(yy, aa)
+
+plt.gca().set_yscale('log')
+plt.gca().set_xscale('log')
+
+#plt.plot(g[:,2])
+
+plt.gcf().savefig("grav.png")
+
+
+xx=np.arange(N/2)*L/N 
+mass = 3*(H100*h/Mpc_in_m)**2/(8*np.pi*G) * omega_m * (Mpc_in_m)**3
+
+real_pot = 6.67e-11 * mass / (udistance*xx)
+
+plt.clf()
+plt.plot(xx,pot*up)
+plt.plot(xx,real_pot)
+plt.gca().set_yscale('log')
+plt.gca().set_xscale('log')
+plt.gcf().savefig("pot.png")
--- a/libLSS/tests/ref_pm.h5
+++ b/libLSS/tests/ref_pm.h5
--- a/libLSS/tests/testFramework.cpp
+++ b/libLSS/tests/testFramework.cpp
@ -0,0 +1,14 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/testFramework.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <string>
+#include "libLSS/tests/testFramework.hpp" 
+#include "libLSS/cconfig.h" 
+
+std::string LibLSS_tests::reference_path = __LIBLSS_TEST_REFERENCE_PATH;
--- a/libLSS/tests/testFramework.hpp
+++ b/libLSS/tests/testFramework.hpp
@ -0,0 +1,47 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/testFramework.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_TESTS_TESTFRAMEWORK_HPP
+#define __LIBLSS_TESTS_TESTFRAMEWORK_HPP
+
+#include <H5Cpp.h>
+#include <boost/format.hpp>
+#include <CosmoTool/hdf5_array.hpp>
+
+namespace LibLSS_tests {
+  extern std::string reference_path;
+
+  namespace {
+    namespace prefix {
+      namespace details {
+        std::string prefix_type(float a) { return "f"; }
+        std::string prefix_type(double a) { return "f"; }
+        std::string prefix_type(int a) { return "i"; }
+        std::string prefix_type(std::complex<float> a) { return "c"; }
+        std::string prefix_type(std::complex<double> a) { return "c"; }
+      } // namespace details
+
+      template <typename T>
+      std::string get() {
+        return details::prefix_type(T());
+      }
+    } // namespace prefix
+  }   // namespace
+
+  template <typename T>
+  void loadReferenceInput(size_t N, boost::multi_array_ref<T, 3> &data) {
+    H5::H5File f(reference_path, H5F_ACC_RDONLY);
+
+    CosmoTool::hdf5_read_array(
+        f, boost::str(boost::format("%s_size_%d") % prefix::get<T>() % N), data,
+        false, true);
+  }
+} // namespace LibLSS_tests
+
+#endif
--- a/libLSS/tests/test_array.cpp
+++ b/libLSS/tests/test_array.cpp
@ -0,0 +1,73 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_array.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <boost/config.hpp>
+#ifdef BOOST_NO_CXX11_AUTO_DECLARATIONS
+#error This test needs C++11 features to compile.
+#else
+
+#include <boost/multi_array.hpp>
+#include "libLSS/tools/array_tools.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/log_traits.hpp"
+#include "libLSS/tools/fused_assign.hpp"
+#include "libLSS/tools/fused_array.hpp"
+#include <boost/lambda/lambda.hpp>
+#include <boost/bind/bind.hpp>
+
+using boost::placeholders::_1;
+
+using namespace LibLSS;
+
+
+static
+void aSwapper(boost::multi_array<double,1>& a, long i, long j)
+{
+    std::swap(a[i], a[j]);
+}
+
+int main(int argc, char **argv)
+{
+    using boost::extents;
+    
+    setupMPI(argc, argv);
+    LibLSS::StaticInit::execute();
+    
+    boost::multi_array<double,1> a(extents[10]);
+    boost::multi_array<long,1> idx(extents[10]);
+    
+    copy_array(a, b_fused_idx<double, 1>(10.0-boost::lambda::_1));
+    copy_array(idx, b_fused_idx<long, 1>(9-boost::lambda::_1));
+
+    for (int i = 0; i < 100; i++) {
+      int j = drand48()*a.shape()[0];
+      int k = drand48()*a.shape()[0];
+      std::swap(a[j],a[k]);
+      std::swap(idx[j],idx[k]);
+    }
+
+    std::cout << "Before sorting" << std::endl;
+    for (auto r : a) {    
+        std::cout << r << std::endl;
+    }
+
+    
+    array::reorder(idx, boost::bind(aSwapper, boost::ref(a), boost::placeholders::_1, boost::placeholders::_2));
+    
+    std::cout << "After sorting" << std::endl;
+    for (auto r : a) {    
+        std::cout << r << std::endl;
+    }
+
+    doneMPI();
+    
+    return 0;
+}
+#endif
--- a/libLSS/tests/test_auto_interpolator.cpp
+++ b/libLSS/tests/test_auto_interpolator.cpp
@ -0,0 +1,30 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_auto_interpolator.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include "libLSS/tools/auto_interpolator.hpp"
+#include <boost/lambda/lambda.hpp>
+#include <CosmoTool/algo.hpp>
+
+using namespace LibLSS;
+
+int main()
+{
+  using boost::lambda::_1;
+  auto a = build_auto_interpolator(CosmoTool::square<double>, 0., 4., 0.1, 0., 16.);
+
+  for (double i = -2; i < 7; i+=0.01)
+    std::cout << i << " " << a(i) << " " << (i*i) << std::endl;
+
+  auto_interpolator<double> b;
+
+  b = a;
+
+  return 0;
+}
--- a/libLSS/tests/test_auto_interpolator.cpp.expected
+++ b/libLSS/tests/test_auto_interpolator.cpp.expected
@ -0,0 +1,901 @@
+-2 0 4
+-1.99 0 3.9601
+-1.98 0 3.9204
+-1.97 0 3.8809
+-1.96 0 3.8416
+-1.95 0 3.8025
+-1.94 0 3.7636
+-1.93 0 3.7249
+-1.92 0 3.6864
+-1.91 0 3.6481
+-1.9 0 3.61
+-1.89 0 3.5721
+-1.88 0 3.5344
+-1.87 0 3.4969
+-1.86 0 3.4596
+-1.85 0 3.4225
+-1.84 0 3.3856
+-1.83 0 3.3489
+-1.82 0 3.3124
+-1.81 0 3.2761
+-1.8 0 3.24
+-1.79 0 3.2041
+-1.78 0 3.1684
+-1.77 0 3.1329
+-1.76 0 3.0976
+-1.75 0 3.0625
+-1.74 0 3.0276
+-1.73 0 2.9929
+-1.72 0 2.9584
+-1.71 0 2.9241
+-1.7 0 2.89
+-1.69 0 2.8561
+-1.68 0 2.8224
+-1.67 0 2.7889
+-1.66 0 2.7556
+-1.65 0 2.7225
+-1.64 0 2.6896
+-1.63 0 2.6569
+-1.62 0 2.6244
+-1.61 0 2.5921
+-1.6 0 2.56
+-1.59 0 2.5281
+-1.58 0 2.4964
+-1.57 0 2.4649
+-1.56 0 2.4336
+-1.55 0 2.4025
+-1.54 0 2.3716
+-1.53 0 2.3409
+-1.52 0 2.3104
+-1.51 0 2.2801
+-1.5 0 2.25
+-1.49 0 2.2201
+-1.48 0 2.1904
+-1.47 0 2.1609
+-1.46 0 2.1316
+-1.45 0 2.1025
+-1.44 0 2.0736
+-1.43 0 2.0449
+-1.42 0 2.0164
+-1.41 0 1.9881
+-1.4 0 1.96
+-1.39 0 1.9321
+-1.38 0 1.9044
+-1.37 0 1.8769
+-1.36 0 1.8496
+-1.35 0 1.8225
+-1.34 0 1.7956
+-1.33 0 1.7689
+-1.32 0 1.7424
+-1.31 0 1.7161
+-1.3 0 1.69
+-1.29 0 1.6641
+-1.28 0 1.6384
+-1.27 0 1.6129
+-1.26 0 1.5876
+-1.25 0 1.5625
+-1.24 0 1.5376
+-1.23 0 1.5129
+-1.22 0 1.4884
+-1.21 0 1.4641
+-1.2 0 1.44
+-1.19 0 1.4161
+-1.18 0 1.3924
+-1.17 0 1.3689
+-1.16 0 1.3456
+-1.15 0 1.3225
+-1.14 0 1.2996
+-1.13 0 1.2769
+-1.12 0 1.2544
+-1.11 0 1.2321
+-1.1 0 1.21
+-1.09 0 1.1881
+-1.08 0 1.1664
+-1.07 0 1.1449
+-1.06 0 1.1236
+-1.05 0 1.1025
+-1.04 0 1.0816
+-1.03 0 1.0609
+-1.02 0 1.0404
+-1.01 0 1.0201
+-1 0 1
+-0.99 0 0.9801
+-0.98 0 0.9604
+-0.97 0 0.9409
+-0.96 0 0.9216
+-0.95 0 0.9025
+-0.94 0 0.8836
+-0.93 0 0.8649
+-0.92 0 0.8464
+-0.91 0 0.8281
+-0.9 0 0.81
+-0.89 0 0.7921
+-0.88 0 0.7744
+-0.87 0 0.7569
+-0.86 0 0.7396
+-0.85 0 0.7225
+-0.84 0 0.7056
+-0.83 0 0.6889
+-0.82 0 0.6724
+-0.81 0 0.6561
+-0.8 0 0.64
+-0.79 0 0.6241
+-0.78 0 0.6084
+-0.77 0 0.5929
+-0.76 0 0.5776
+-0.75 0 0.5625
+-0.74 0 0.5476
+-0.73 0 0.5329
+-0.72 0 0.5184
+-0.71 0 0.5041
+-0.7 0 0.49
+-0.69 0 0.4761
+-0.68 0 0.4624
+-0.67 0 0.4489
+-0.66 0 0.4356
+-0.65 0 0.4225
+-0.64 0 0.4096
+-0.63 0 0.3969
+-0.62 0 0.3844
+-0.61 0 0.3721
+-0.6 0 0.36
+-0.59 0 0.3481
+-0.58 0 0.3364
+-0.57 0 0.3249
+-0.56 0 0.3136
+-0.55 0 0.3025
+-0.54 0 0.2916
+-0.53 0 0.2809
+-0.52 0 0.2704
+-0.51 0 0.2601
+-0.5 0 0.25
+-0.49 0 0.2401
+-0.48 0 0.2304
+-0.47 0 0.2209
+-0.46 0 0.2116
+-0.45 0 0.2025
+-0.44 0 0.1936
+-0.43 0 0.1849
+-0.42 0 0.1764
+-0.41 0 0.1681
+-0.4 0 0.16
+-0.39 0 0.1521
+-0.38 0 0.1444
+-0.37 0 0.1369
+-0.36 0 0.1296
+-0.35 0 0.1225
+-0.34 0 0.1156
+-0.33 0 0.1089
+-0.32 0 0.1024
+-0.31 0 0.0961
+-0.3 0 0.09
+-0.29 0 0.0841
+-0.28 0 0.0784
+-0.27 0 0.0729
+-0.26 0 0.0676
+-0.25 0 0.0625
+-0.24 0 0.0576
+-0.23 0 0.0529
+-0.22 0 0.0484
+-0.21 0 0.0441
+-0.2 0 0.04
+-0.19 0 0.0361
+-0.18 0 0.0324
+-0.17 0 0.0289
+-0.16 0 0.0256
+-0.15 0 0.0225
+-0.14 0 0.0196
+-0.13 0 0.0169
+-0.12 0 0.0144
+-0.11 0 0.0121
+-0.1 0 0.01
+-0.09 0 0.0081
+-0.08 0 0.0064
+-0.07 0 0.0049
+-0.06 0 0.0036
+-0.05 0 0.0025
+-0.04 0 0.0016
+-0.03 0 0.0009
+-0.02 0 0.0004
+-0.01 0 0.0001
+1.64105e-15 1.64105e-16 2.69304e-30
+0.01 0.001 0.0001
+0.02 0.002 0.0004
+0.03 0.003 0.0009
+0.04 0.004 0.0016
+0.05 0.005 0.0025
+0.06 0.006 0.0036
+0.07 0.007 0.0049
+0.08 0.008 0.0064
+0.09 0.009 0.0081
+0.1 0.01 0.01
+0.11 0.013 0.0121
+0.12 0.016 0.0144
+0.13 0.019 0.0169
+0.14 0.022 0.0196
+0.15 0.025 0.0225
+0.16 0.028 0.0256
+0.17 0.031 0.0289
+0.18 0.034 0.0324
+0.19 0.037 0.0361
+0.2 0.04 0.04
+0.21 0.045 0.0441
+0.22 0.05 0.0484
+0.23 0.055 0.0529
+0.24 0.06 0.0576
+0.25 0.065 0.0625
+0.26 0.07 0.0676
+0.27 0.075 0.0729
+0.28 0.08 0.0784
+0.29 0.085 0.0841
+0.3 0.09 0.09
+0.31 0.097 0.0961
+0.32 0.104 0.1024
+0.33 0.111 0.1089
+0.34 0.118 0.1156
+0.35 0.125 0.1225
+0.36 0.132 0.1296
+0.37 0.139 0.1369
+0.38 0.146 0.1444
+0.39 0.153 0.1521
+0.4 0.16 0.16
+0.41 0.169 0.1681
+0.42 0.178 0.1764
+0.43 0.187 0.1849
+0.44 0.196 0.1936
+0.45 0.205 0.2025
+0.46 0.214 0.2116
+0.47 0.223 0.2209
+0.48 0.232 0.2304
+0.49 0.241 0.2401
+0.5 0.25 0.25
+0.51 0.261 0.2601
+0.52 0.272 0.2704
+0.53 0.283 0.2809
+0.54 0.294 0.2916
+0.55 0.305 0.3025
+0.56 0.316 0.3136
+0.57 0.327 0.3249
+0.58 0.338 0.3364
+0.59 0.349 0.3481
+0.6 0.36 0.36
+0.61 0.373 0.3721
+0.62 0.386 0.3844
+0.63 0.399 0.3969
+0.64 0.412 0.4096
+0.65 0.425 0.4225
+0.66 0.438 0.4356
+0.67 0.451 0.4489
+0.68 0.464 0.4624
+0.69 0.477 0.4761
+0.7 0.49 0.49
+0.71 0.505 0.5041
+0.72 0.52 0.5184
+0.73 0.535 0.5329
+0.74 0.55 0.5476
+0.75 0.565 0.5625
+0.76 0.58 0.5776
+0.77 0.595 0.5929
+0.78 0.61 0.6084
+0.79 0.625 0.6241
+0.8 0.64 0.64
+0.81 0.657 0.6561
+0.82 0.674 0.6724
+0.83 0.691 0.6889
+0.84 0.708 0.7056
+0.85 0.725 0.7225
+0.86 0.742 0.7396
+0.87 0.759 0.7569
+0.88 0.776 0.7744
+0.89 0.793 0.7921
+0.9 0.81 0.81
+0.91 0.829 0.8281
+0.92 0.848 0.8464
+0.93 0.867 0.8649
+0.94 0.886 0.8836
+0.95 0.905 0.9025
+0.96 0.924 0.9216
+0.97 0.943 0.9409
+0.98 0.962 0.9604
+0.99 0.981 0.9801
+1 1 1
+1.01 1.021 1.0201
+1.02 1.042 1.0404
+1.03 1.063 1.0609
+1.04 1.084 1.0816
+1.05 1.105 1.1025
+1.06 1.126 1.1236
+1.07 1.147 1.1449
+1.08 1.168 1.1664
+1.09 1.189 1.1881
+1.1 1.21 1.21
+1.11 1.233 1.2321
+1.12 1.256 1.2544
+1.13 1.279 1.2769
+1.14 1.302 1.2996
+1.15 1.325 1.3225
+1.16 1.348 1.3456
+1.17 1.371 1.3689
+1.18 1.394 1.3924
+1.19 1.417 1.4161
+1.2 1.44 1.44
+1.21 1.465 1.4641
+1.22 1.49 1.4884
+1.23 1.515 1.5129
+1.24 1.54 1.5376
+1.25 1.565 1.5625
+1.26 1.59 1.5876
+1.27 1.615 1.6129
+1.28 1.64 1.6384
+1.29 1.665 1.6641
+1.3 1.69 1.69
+1.31 1.717 1.7161
+1.32 1.744 1.7424
+1.33 1.771 1.7689
+1.34 1.798 1.7956
+1.35 1.825 1.8225
+1.36 1.852 1.8496
+1.37 1.879 1.8769
+1.38 1.906 1.9044
+1.39 1.933 1.9321
+1.4 1.96 1.96
+1.41 1.989 1.9881
+1.42 2.018 2.0164
+1.43 2.047 2.0449
+1.44 2.076 2.0736
+1.45 2.105 2.1025
+1.46 2.134 2.1316
+1.47 2.163 2.1609
+1.48 2.192 2.1904
+1.49 2.221 2.2201
+1.5 2.25 2.25
+1.51 2.281 2.2801
+1.52 2.312 2.3104
+1.53 2.343 2.3409
+1.54 2.374 2.3716
+1.55 2.405 2.4025
+1.56 2.436 2.4336
+1.57 2.467 2.4649
+1.58 2.498 2.4964
+1.59 2.529 2.5281
+1.6 2.56 2.56
+1.61 2.593 2.5921
+1.62 2.626 2.6244
+1.63 2.659 2.6569
+1.64 2.692 2.6896
+1.65 2.725 2.7225
+1.66 2.758 2.7556
+1.67 2.791 2.7889
+1.68 2.824 2.8224
+1.69 2.857 2.8561
+1.7 2.89 2.89
+1.71 2.925 2.9241
+1.72 2.96 2.9584
+1.73 2.995 2.9929
+1.74 3.03 3.0276
+1.75 3.065 3.0625
+1.76 3.1 3.0976
+1.77 3.135 3.1329
+1.78 3.17 3.1684
+1.79 3.205 3.2041
+1.8 3.24 3.24
+1.81 3.277 3.2761
+1.82 3.314 3.3124
+1.83 3.351 3.3489
+1.84 3.388 3.3856
+1.85 3.425 3.4225
+1.86 3.462 3.4596
+1.87 3.499 3.4969
+1.88 3.536 3.5344
+1.89 3.573 3.5721
+1.9 3.61 3.61
+1.91 3.649 3.6481
+1.92 3.688 3.6864
+1.93 3.727 3.7249
+1.94 3.766 3.7636
+1.95 3.805 3.8025
+1.96 3.844 3.8416
+1.97 3.883 3.8809
+1.98 3.922 3.9204
+1.99 3.961 3.9601
+2 4 4
+2.01 4.041 4.0401
+2.02 4.082 4.0804
+2.03 4.123 4.1209
+2.04 4.164 4.1616
+2.05 4.205 4.2025
+2.06 4.246 4.2436
+2.07 4.287 4.2849
+2.08 4.328 4.3264
+2.09 4.369 4.3681
+2.1 4.41 4.41
+2.11 4.453 4.4521
+2.12 4.496 4.4944
+2.13 4.539 4.5369
+2.14 4.582 4.5796
+2.15 4.625 4.6225
+2.16 4.668 4.6656
+2.17 4.711 4.7089
+2.18 4.754 4.7524
+2.19 4.797 4.7961
+2.2 4.84 4.84
+2.21 4.885 4.8841
+2.22 4.93 4.9284
+2.23 4.975 4.9729
+2.24 5.02 5.0176
+2.25 5.065 5.0625
+2.26 5.11 5.1076
+2.27 5.155 5.1529
+2.28 5.2 5.1984
+2.29 5.245 5.2441
+2.3 5.29 5.29
+2.31 5.337 5.3361
+2.32 5.384 5.3824
+2.33 5.431 5.4289
+2.34 5.478 5.4756
+2.35 5.525 5.5225
+2.36 5.572 5.5696
+2.37 5.619 5.6169
+2.38 5.666 5.6644
+2.39 5.713 5.7121
+2.4 5.76 5.76
+2.41 5.809 5.8081
+2.42 5.858 5.8564
+2.43 5.907 5.9049
+2.44 5.956 5.9536
+2.45 6.005 6.0025
+2.46 6.054 6.0516
+2.47 6.103 6.1009
+2.48 6.152 6.1504
+2.49 6.201 6.2001
+2.5 6.25 6.25
+2.51 6.301 6.3001
+2.52 6.352 6.3504
+2.53 6.403 6.4009
+2.54 6.454 6.4516
+2.55 6.505 6.5025
+2.56 6.556 6.5536
+2.57 6.607 6.6049
+2.58 6.658 6.6564
+2.59 6.709 6.7081
+2.6 6.76 6.76
+2.61 6.813 6.8121
+2.62 6.866 6.8644
+2.63 6.919 6.9169
+2.64 6.972 6.9696
+2.65 7.025 7.0225
+2.66 7.078 7.0756
+2.67 7.131 7.1289
+2.68 7.184 7.1824
+2.69 7.237 7.2361
+2.7 7.29 7.29
+2.71 7.345 7.3441
+2.72 7.4 7.3984
+2.73 7.455 7.4529
+2.74 7.51 7.5076
+2.75 7.565 7.5625
+2.76 7.62 7.6176
+2.77 7.675 7.6729
+2.78 7.73 7.7284
+2.79 7.785 7.7841
+2.8 7.84 7.84
+2.81 7.897 7.8961
+2.82 7.954 7.9524
+2.83 8.011 8.0089
+2.84 8.068 8.0656
+2.85 8.125 8.1225
+2.86 8.182 8.1796
+2.87 8.239 8.2369
+2.88 8.296 8.2944
+2.89 8.353 8.3521
+2.9 8.41 8.41
+2.91 8.469 8.4681
+2.92 8.528 8.5264
+2.93 8.587 8.5849
+2.94 8.646 8.6436
+2.95 8.705 8.7025
+2.96 8.764 8.7616
+2.97 8.823 8.8209
+2.98 8.882 8.8804
+2.99 8.941 8.9401
+3 9 9
+3.01 9.061 9.0601
+3.02 9.122 9.1204
+3.03 9.183 9.1809
+3.04 9.244 9.2416
+3.05 9.305 9.3025
+3.06 9.366 9.3636
+3.07 9.427 9.4249
+3.08 9.488 9.4864
+3.09 9.549 9.5481
+3.1 9.61 9.61
+3.11 9.673 9.6721
+3.12 9.736 9.7344
+3.13 9.799 9.7969
+3.14 9.862 9.8596
+3.15 9.925 9.9225
+3.16 9.988 9.9856
+3.17 10.051 10.0489
+3.18 10.114 10.1124
+3.19 10.177 10.1761
+3.2 10.24 10.24
+3.21 10.305 10.3041
+3.22 10.37 10.3684
+3.23 10.435 10.4329
+3.24 10.5 10.4976
+3.25 10.565 10.5625
+3.26 10.63 10.6276
+3.27 10.695 10.6929
+3.28 10.76 10.7584
+3.29 10.825 10.8241
+3.3 10.89 10.89
+3.31 10.957 10.9561
+3.32 11.024 11.0224
+3.33 11.091 11.0889
+3.34 11.158 11.1556
+3.35 11.225 11.2225
+3.36 11.292 11.2896
+3.37 11.359 11.3569
+3.38 11.426 11.4244
+3.39 11.493 11.4921
+3.4 11.56 11.56
+3.41 11.629 11.6281
+3.42 11.698 11.6964
+3.43 11.767 11.7649
+3.44 11.836 11.8336
+3.45 11.905 11.9025
+3.46 11.974 11.9716
+3.47 12.043 12.0409
+3.48 12.112 12.1104
+3.49 12.181 12.1801
+3.5 12.25 12.25
+3.51 12.321 12.3201
+3.52 12.392 12.3904
+3.53 12.463 12.4609
+3.54 12.534 12.5316
+3.55 12.605 12.6025
+3.56 12.676 12.6736
+3.57 12.747 12.7449
+3.58 12.818 12.8164
+3.59 12.889 12.8881
+3.6 12.96 12.96
+3.61 13.033 13.0321
+3.62 13.106 13.1044
+3.63 13.179 13.1769
+3.64 13.252 13.2496
+3.65 13.325 13.3225
+3.66 13.398 13.3956
+3.67 13.471 13.4689
+3.68 13.544 13.5424
+3.69 13.617 13.6161
+3.7 13.69 13.69
+3.71 13.765 13.7641
+3.72 13.84 13.8384
+3.73 13.915 13.9129
+3.74 13.99 13.9876
+3.75 14.065 14.0625
+3.76 14.14 14.1376
+3.77 14.215 14.2129
+3.78 14.29 14.2884
+3.79 14.365 14.3641
+3.8 14.44 14.44
+3.81 14.517 14.5161
+3.82 14.594 14.5924
+3.83 14.671 14.6689
+3.84 14.748 14.7456
+3.85 14.825 14.8225
+3.86 14.902 14.8996
+3.87 14.979 14.9769
+3.88 15.056 15.0544
+3.89 15.133 15.1321
+3.9 15.21 15.21
+3.91 16 15.2881
+3.92 16 15.3664
+3.93 16 15.4449
+3.94 16 15.5236
+3.95 16 15.6025
+3.96 16 15.6816
+3.97 16 15.7609
+3.98 16 15.8404
+3.99 16 15.9201
+4 16 16
+4.01 16 16.0801
+4.02 16 16.1604
+4.03 16 16.2409
+4.04 16 16.3216
+4.05 16 16.4025
+4.06 16 16.4836
+4.07 16 16.5649
+4.08 16 16.6464
+4.09 16 16.7281
+4.1 16 16.81
+4.11 16 16.8921
+4.12 16 16.9744
+4.13 16 17.0569
+4.14 16 17.1396
+4.15 16 17.2225
+4.16 16 17.3056
+4.17 16 17.3889
+4.18 16 17.4724
+4.19 16 17.5561
+4.2 16 17.64
+4.21 16 17.7241
+4.22 16 17.8084
+4.23 16 17.8929
+4.24 16 17.9776
+4.25 16 18.0625
+4.26 16 18.1476
+4.27 16 18.2329
+4.28 16 18.3184
+4.29 16 18.4041
+4.3 16 18.49
+4.31 16 18.5761
+4.32 16 18.6624
+4.33 16 18.7489
+4.34 16 18.8356
+4.35 16 18.9225
+4.36 16 19.0096
+4.37 16 19.0969
+4.38 16 19.1844
+4.39 16 19.2721
+4.4 16 19.36
+4.41 16 19.4481
+4.42 16 19.5364
+4.43 16 19.6249
+4.44 16 19.7136
+4.45 16 19.8025
+4.46 16 19.8916
+4.47 16 19.9809
+4.48 16 20.0704
+4.49 16 20.1601
+4.5 16 20.25
+4.51 16 20.3401
+4.52 16 20.4304
+4.53 16 20.5209
+4.54 16 20.6116
+4.55 16 20.7025
+4.56 16 20.7936
+4.57 16 20.8849
+4.58 16 20.9764
+4.59 16 21.0681
+4.6 16 21.16
+4.61 16 21.2521
+4.62 16 21.3444
+4.63 16 21.4369
+4.64 16 21.5296
+4.65 16 21.6225
+4.66 16 21.7156
+4.67 16 21.8089
+4.68 16 21.9024
+4.69 16 21.9961
+4.7 16 22.09
+4.71 16 22.1841
+4.72 16 22.2784
+4.73 16 22.3729
+4.74 16 22.4676
+4.75 16 22.5625
+4.76 16 22.6576
+4.77 16 22.7529
+4.78 16 22.8484
+4.79 16 22.9441
+4.8 16 23.04
+4.81 16 23.1361
+4.82 16 23.2324
+4.83 16 23.3289
+4.84 16 23.4256
+4.85 16 23.5225
+4.86 16 23.6196
+4.87 16 23.7169
+4.88 16 23.8144
+4.89 16 23.9121
+4.9 16 24.01
+4.91 16 24.1081
+4.92 16 24.2064
+4.93 16 24.3049
+4.94 16 24.4036
+4.95 16 24.5025
+4.96 16 24.6016
+4.97 16 24.7009
+4.98 16 24.8004
+4.99 16 24.9001
+5 16 25
+5.01 16 25.1001
+5.02 16 25.2004
+5.03 16 25.3009
+5.04 16 25.4016
+5.05 16 25.5025
+5.06 16 25.6036
+5.07 16 25.7049
+5.08 16 25.8064
+5.09 16 25.9081
+5.1 16 26.01
+5.11 16 26.1121
+5.12 16 26.2144
+5.13 16 26.3169
+5.14 16 26.4196
+5.15 16 26.5225
+5.16 16 26.6256
+5.17 16 26.7289
+5.18 16 26.8324
+5.19 16 26.9361
+5.2 16 27.04
+5.21 16 27.1441
+5.22 16 27.2484
+5.23 16 27.3529
+5.24 16 27.4576
+5.25 16 27.5625
+5.26 16 27.6676
+5.27 16 27.7729
+5.28 16 27.8784
+5.29 16 27.9841
+5.3 16 28.09
+5.31 16 28.1961
+5.32 16 28.3024
+5.33 16 28.4089
+5.34 16 28.5156
+5.35 16 28.6225
+5.36 16 28.7296
+5.37 16 28.8369
+5.38 16 28.9444
+5.39 16 29.0521
+5.4 16 29.16
+5.41 16 29.2681
+5.42 16 29.3764
+5.43 16 29.4849
+5.44 16 29.5936
+5.45 16 29.7025
+5.46 16 29.8116
+5.47 16 29.9209
+5.48 16 30.0304
+5.49 16 30.1401
+5.5 16 30.25
+5.51 16 30.3601
+5.52 16 30.4704
+5.53 16 30.5809
+5.54 16 30.6916
+5.55 16 30.8025
+5.56 16 30.9136
+5.57 16 31.0249
+5.58 16 31.1364
+5.59 16 31.2481
+5.6 16 31.36
+5.61 16 31.4721
+5.62 16 31.5844
+5.63 16 31.6969
+5.64 16 31.8096
+5.65 16 31.9225
+5.66 16 32.0356
+5.67 16 32.1489
+5.68 16 32.2624
+5.69 16 32.3761
+5.7 16 32.49
+5.71 16 32.6041
+5.72 16 32.7184
+5.73 16 32.8329
+5.74 16 32.9476
+5.75 16 33.0625
+5.76 16 33.1776
+5.77 16 33.2929
+5.78 16 33.4084
+5.79 16 33.5241
+5.8 16 33.64
+5.81 16 33.7561
+5.82 16 33.8724
+5.83 16 33.9889
+5.84 16 34.1056
+5.85 16 34.2225
+5.86 16 34.3396
+5.87 16 34.4569
+5.88 16 34.5744
+5.89 16 34.6921
+5.9 16 34.81
+5.91 16 34.9281
+5.92 16 35.0464
+5.93 16 35.1649
+5.94 16 35.2836
+5.95 16 35.4025
+5.96 16 35.5216
+5.97 16 35.6409
+5.98 16 35.7604
+5.99 16 35.8801
+6 16 36
+6.01 16 36.1201
+6.02 16 36.2404
+6.03 16 36.3609
+6.04 16 36.4816
+6.05 16 36.6025
+6.06 16 36.7236
+6.07 16 36.8449
+6.08 16 36.9664
+6.09 16 37.0881
+6.1 16 37.21
+6.11 16 37.3321
+6.12 16 37.4544
+6.13 16 37.5769
+6.14 16 37.6996
+6.15 16 37.8225
+6.16 16 37.9456
+6.17 16 38.0689
+6.18 16 38.1924
+6.19 16 38.3161
+6.2 16 38.44
+6.21 16 38.5641
+6.22 16 38.6884
+6.23 16 38.8129
+6.24 16 38.9376
+6.25 16 39.0625
+6.26 16 39.1876
+6.27 16 39.3129
+6.28 16 39.4384
+6.29 16 39.5641
+6.3 16 39.69
+6.31 16 39.8161
+6.32 16 39.9424
+6.33 16 40.0689
+6.34 16 40.1956
+6.35 16 40.3225
+6.36 16 40.4496
+6.37 16 40.5769
+6.38 16 40.7044
+6.39 16 40.8321
+6.4 16 40.96
+6.41 16 41.0881
+6.42 16 41.2164
+6.43 16 41.3449
+6.44 16 41.4736
+6.45 16 41.6025
+6.46 16 41.7316
+6.47 16 41.8609
+6.48 16 41.9904
+6.49 16 42.1201
+6.5 16 42.25
+6.51 16 42.3801
+6.52 16 42.5104
+6.53 16 42.6409
+6.54 16 42.7716
+6.55 16 42.9025
+6.56 16 43.0336
+6.57 16 43.1649
+6.58 16 43.2964
+6.59 16 43.4281
+6.6 16 43.56
+6.61 16 43.6921
+6.62 16 43.8244
+6.63 16 43.9569
+6.64 16 44.0896
+6.65 16 44.2225
+6.66 16 44.3556
+6.67 16 44.4889
+6.68 16 44.6224
+6.69 16 44.7561
+6.7 16 44.89
+6.71 16 45.0241
+6.72 16 45.1584
+6.73 16 45.2929
+6.74 16 45.4276
+6.75 16 45.5625
+6.76 16 45.6976
+6.77 16 45.8329
+6.78 16 45.9684
+6.79 16 46.1041
+6.8 16 46.24
+6.81 16 46.3761
+6.82 16 46.5124
+6.83 16 46.6489
+6.84 16 46.7856
+6.85 16 46.9225
+6.86 16 47.0596
+6.87 16 47.1969
+6.88 16 47.3344
+6.89 16 47.4721
+6.9 16 47.61
+6.91 16 47.7481
+6.92 16 47.8864
+6.93 16 48.0249
+6.94 16 48.1636
+6.95 16 48.3025
+6.96 16 48.4416
+6.97 16 48.5809
+6.98 16 48.7204
+6.99 16 48.8601
+7 16 49
--- a/libLSS/tests/test_cg.cpp
+++ b/libLSS/tests/test_cg.cpp
@ -0,0 +1,90 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_cg.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <boost/multi_array.hpp>
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/tools/optimization/cg.hpp"
+#include <CosmoTool/algo.hpp>
+#include <CosmoTool/hdf5_array.hpp>
+#include <boost/preprocessor/cat.hpp>
+#include <boost/preprocessor/stringize.hpp>
+#include <boost/preprocessor/seq/for_each.hpp>
+#include <algorithm>
+#include "libLSS/tools/optimization/array_helper.hpp"
+
+using namespace LibLSS;
+using boost::extents;
+using namespace CosmoTool;
+using namespace std;
+
+typedef Optimization::BoostArrayAllocator<double, 1> allocator_t;
+typedef allocator_t::array_t Array;
+
+void A(Array &out, Array const &in) {
+  int N = in.shape()[0];
+  //initialize values
+  for (int i = 0; i < N; i++) {
+    out[i] = 0;
+    for (int j = 0; j < N; j++) {
+      //test with simple correlation function
+      double Mij = 0.5 * exp(-0.5 * (i - j) * (i - j));
+      out[i] += Mij * in[j];
+    }
+  }
+}
+
+int main(int argc, char **argv) {
+  setupMPI(argc, argv);
+  LibLSS::Console &console = LibLSS::Console::instance();
+  LibLSS::StaticInit::execute();
+
+  allocator_t allocator;
+  CG<allocator_t> cg(allocator);
+
+  int N = 2000;
+  boost::multi_array<double, 1> b(boost::extents[N]);
+  boost::multi_array<double, 1> x0(boost::extents[N]);
+  boost::multi_array<double, 1> x(boost::extents[N]);
+
+  fwrap(b) = 1;
+  fwrap(x) = 0;
+
+  for (int i = 0; i < b.size(); i++)
+    x0[i] = i;
+
+  A(b, x0);
+
+  cg.run(A, b, x);
+
+  double max = 0;
+  int imax = 0;
+  double eps = 0.;
+  for (int i = 0; i < b.size(); i++) {
+    double diff = fabs(x[i] - x0[i]);
+    if (max < diff)
+      max = diff;
+    imax = i;
+
+    eps += diff * diff;
+  }
+
+  if (eps < 1e-5)
+    std::cout << std::endl << "CG matrix inversion test passed!" << std::endl;
+  else
+    std::cout << "CG matrix inversion test failed!" << std::endl << std::endl;
+
+  std::cout << "Distance between truth and solution  = " << eps << std::endl;
+  std::cout << "Largest deviation  = " << max << " at element imax =" << imax
+            << std::endl;
+
+  LibLSS::StaticInit::finalize();
+
+  doneMPI();
+  return 0;
+}
--- a/libLSS/tests/test_cic.cpp
+++ b/libLSS/tests/test_cic.cpp
@ -0,0 +1,128 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_cic.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <cmath>
+#include <CosmoTool/algo.hpp>
+#include <boost/multi_array.hpp>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/tools/uninitialized_type.hpp"
+#include "libLSS/tools/array_tools.hpp"
+#include "libLSS/physics/classic_cic.hpp"
+#include "libLSS/physics/openmp_cic.hpp"
+#include "libLSS/physics/cosmo.hpp"
+#include <H5Cpp.h>
+#include <CosmoTool/hdf5_array.hpp>
+#include "libLSS/tools/hdf5_error.hpp"
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+#include <boost/chrono.hpp>
+
+#undef RANDOM_ACCESS
+//#define RANDOM_ACCESS
+
+using namespace LibLSS;
+using CosmoTool::cube;
+
+typedef ClassicCloudInCell<double> CIC;
+#ifdef _OPENMP
+typedef OpenMPCloudInCell<double> CIC_MP;
+#endif
+
+int main(int argc, char **argv) {
+  MPI_Communication *world = setupMPI(argc, argv);
+  StaticInit::execute();
+  CosmologicalParameters cosmo;
+  cosmo.omega_m = 0.30;
+  cosmo.omega_b = 0.045;
+  cosmo.omega_q = 0.70;
+  cosmo.w = -1;
+  cosmo.n_s = 0.97;
+  cosmo.sigma8 = 0.8;
+  cosmo.h = 0.68;
+  cosmo.a0 = 1.0;
+
+  Console::instance().setVerboseLevel<LOG_DEBUG>();
+
+  double L = 1.0;
+  int N = 64;
+  int Np_g = 128;
+  int Np = cube(Np_g);
+  typedef UninitializedArray<boost::multi_array<double, 3>> U_Density;
+  typedef UninitializedArray<boost::multi_array<double, 2>> U_Particles;
+  U_Density density_p(boost::extents[N][N][N]);
+  U_Density density_mp_p(boost::extents[N][N][N]);
+  U_Particles particles_p(boost::extents[Np][3]);
+  U_Density::array_type &density = density_p.get_array();
+  U_Density::array_type &density_mp = density_mp_p.get_array();
+  U_Particles::array_type &particles = particles_p.get_array();
+  CIC cic;
+#ifdef _OPENMP
+  CIC_MP cic_mp;
+#endif
+
+#ifdef RANDOM_ACCESS
+  RandomNumberThreaded<GSL_RandomNumber> rgen(-1);
+
+#  pragma omp parallel for schedule(static)
+  for (long i = 0; i < Np; i++) {
+    particles[i][0] = L * rgen.uniform();
+    particles[i][1] = L * rgen.uniform();
+    particles[i][2] = L * rgen.uniform();
+  }
+#else
+
+#  pragma omp parallel for schedule(static)
+  for (long i = 0; i < Np; i++) {
+    int iz = (i % Np_g);
+    int iy = ((i / Np_g) % Np_g);
+    int ix = ((i / Np_g / Np_g));
+    particles[i][0] = L / Np_g * ix;
+    particles[i][1] = L / Np_g * iy;
+    particles[i][2] = L / Np_g * iz;
+  }
+
+#endif
+
+  Console::instance().print<LOG_INFO>("Clearing and projecting");
+  array::fill(density, 0);
+  array::fill(density_mp, 0);
+
+  using namespace boost::chrono;
+  system_clock::time_point start_classic, end_classic, start_mp, end_mp;
+
+  start_classic = system_clock::now();
+  cic.projection(particles, density, L, L, L, N, N, N);
+  end_classic = system_clock::now();
+
+  start_mp = system_clock::now();
+#ifdef _OPENMP
+  cic_mp.projection(particles, density_mp, L, L, L, N, N, N);
+#endif
+  end_mp = system_clock::now();
+
+  duration<double> elapsed_classic = end_classic - start_classic;
+  duration<double> elapsed_mp = end_mp - start_mp;
+
+  std::cout << "OpenMP: " << elapsed_mp << "  Classic: " << elapsed_classic
+            << std::endl;
+
+  try {
+    H5::H5File f("cic.h5", H5F_ACC_TRUNC);
+    CosmoTool::hdf5_write_array(f, "density", density);
+    CosmoTool::hdf5_write_array(f, "density_mp", density_mp);
+  } catch (const H5::FileIException &) {
+    Console::instance().print<LOG_ERROR>(
+        "Failed to load ref_pm.h5 in the current directory. Check in the "
+        "source directory libLSS/tests/");
+    return 1;
+  }
+
+  return 0;
+}
--- a/libLSS/tests/test_cic_adjoint.cpp
+++ b/libLSS/tests/test_cic_adjoint.cpp
@ -0,0 +1,158 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_cic_adjoint.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <cmath>
+#include <CosmoTool/algo.hpp>
+#include <boost/multi_array.hpp>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/tools/uninitialized_type.hpp"
+#include "libLSS/tools/array_tools.hpp"
+#include "libLSS/physics/classic_cic.hpp"
+//#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include "libLSS/physics/modified_ngp.hpp"
+#include "libLSS/physics/modified_ngp_smooth.hpp"
+#include "libLSS/physics/cosmo.hpp"
+#include <H5Cpp.h>
+#include <CosmoTool/hdf5_array.hpp>
+#include "libLSS/tools/hdf5_error.hpp"
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+#include <boost/chrono.hpp>
+
+//#undef RANDOM_ACCESS
+#define RANDOM_ACCESS
+
+using namespace LibLSS;
+using CosmoTool::cube;
+
+typedef ClassicCloudInCell<double> CIC;
+typedef ModifiedNGP<double, NGPGrid::CIC> MNGP;
+
+int main(int argc, char **argv) {
+  StaticInit::execute();
+  MPI_Communication *world = setupMPI(argc, argv);
+  CosmologicalParameters cosmo;
+
+  cosmo.omega_m = 0.30;
+  cosmo.omega_b = 0.045;
+  cosmo.omega_q = 0.70;
+  cosmo.w = -1;
+  cosmo.n_s = 0.97;
+  cosmo.sigma8 = 0.8;
+  cosmo.h = 0.68;
+  cosmo.a0 = 1.0;
+
+  Console::instance().setVerboseLevel<LOG_DEBUG>();
+
+  double L = 1.0;
+  int N = 64;
+  int Np_g = 64;
+  int Np = cube(Np_g);
+  typedef UninitializedArray<boost::multi_array<double, 3>> U_Density;
+  typedef UninitializedArray<boost::multi_array<double, 4>> U_Velocity;
+  typedef UninitializedArray<boost::multi_array<double, 2>> U_Particles;
+  U_Density density_p(boost::extents[N][N][N]);
+  U_Velocity velocity_p(boost::extents[3][N][N][N]);
+
+  U_Density density_mngp_p(boost::extents[N][N][N]);
+  U_Particles particles_p(boost::extents[Np][3]);
+  U_Particles velocities_p(boost::extents[Np][3]);
+  U_Particles adjoint_p(boost::extents[Np][3]);
+  U_Particles adjoint_mngp_p(boost::extents[Np][3]);
+  U_Density::array_type &density = density_p.get_array();
+  U_Velocity::array_type &velocity = velocity_p.get_array();
+
+  U_Density::array_type &density_mngp = density_mngp_p.get_array();
+  U_Particles::array_type &particles = particles_p.get_array();
+  U_Particles::array_type &velocities = velocities_p.get_array();
+  U_Particles::array_type &adjoint = adjoint_p.get_array();
+  U_Particles::array_type &adjoint_mngp = adjoint_mngp_p.get_array();
+  CIC cic;
+  MNGP mngp;
+
+#ifdef RANDOM_ACCESS
+  RandomNumberThreaded<GSL_RandomNumber> rgen(-1);
+
+#  pragma omp parallel for schedule(static)
+  for (long i = 0; i < Np; i++) {
+    particles[i][0] = L * rgen.uniform();
+    particles[i][1] = L * rgen.uniform();
+    particles[i][2] = L * rgen.uniform();
+
+    velocities[i][0] = 100. * rgen.uniform();
+    velocities[i][1] = 100. * rgen.uniform();
+    velocities[i][2] = 100. * rgen.uniform();
+  }
+#else
+
+#  pragma omp parallel for schedule(static)
+  for (long i = 0; i < Np; i++) {
+    int iz = (i % Np_g);
+    int iy = ((i / Np_g) % Np_g);
+    int ix = ((i / Np_g / Np_g));
+    particles[i][0] = L / Np_g * ix;
+    particles[i][1] = L / Np_g * iy;
+    particles[i][2] = L / Np_g * iz;
+
+    velocities[i][0] = 100.;
+    velocities[i][1] = 100.;
+    velocities[i][2] = 100.;
+  }
+
+#endif
+  Console::instance().print<LOG_INFO>("Clearing and projecting");
+  array::fill(density, 0);
+  array::fill(density_mngp, 0);
+
+  using namespace boost::chrono;
+  system_clock::time_point start_classic, end_classic, start_mp, end_mp,
+      start_mp2, end_mp2;
+
+  start_classic = system_clock::now();
+  CIC::projection(particles, density, L, L, L, N, N, N);
+  end_classic = system_clock::now();
+
+  CIC::adjoint(particles, density, adjoint, L, L, L, N, N, N, 1.0);
+
+  //test velocity binning
+  //start_classic = system_clock::now();
+  //CIC::projection(particles,velocity,velocities,L, L, L, N, N, N);
+  //end_classic = system_clock::now();
+
+  //CIC::adjoint(particles, density, adjoint, L, L, L, N, N, N, 1.0);
+
+  start_mp = system_clock::now();
+  MNGP::projection(particles, density_mngp, L, L, L, N, N, N);
+  end_mp = system_clock::now();
+
+  MNGP::adjoint(particles, density_mngp, adjoint_mngp, L, L, L, N, N, N, 1.0);
+
+  duration<double> elapsed_classic = end_classic - start_classic;
+  duration<double> elapsed_mp = end_mp - start_mp;
+  duration<double> elapsed_mps = end_mp2 - start_mp2;
+
+  std::cout << "MNGP: " << elapsed_mp << "  Classic: " << elapsed_classic
+            << std::endl;
+
+  try {
+    H5::H5File f("cic.h5", H5F_ACC_TRUNC);
+    CosmoTool::hdf5_write_array(f, "density", density);
+    CosmoTool::hdf5_write_array(f, "density_mngp", density_mngp);
+    CosmoTool::hdf5_write_array(f, "adjoint", adjoint);
+    CosmoTool::hdf5_write_array(f, "adjoint_mngp", adjoint_mngp);
+  } catch (const H5::FileIException &) {
+    Console::instance().print<LOG_ERROR>(
+        "Failed to load ref_pm.h5 in the current directory. Check in the "
+        "source directory libLSS/tests/");
+    return 1;
+  }
+
+  return 0;
+}
--- a/libLSS/tests/test_class_interface.cpp
+++ b/libLSS/tests/test_class_interface.cpp
@ -0,0 +1,61 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_class_interface.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+
+#include <iostream>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/physics/class_cosmo.hpp"
+
+using namespace LibLSS;
+
+int main(int argc, char **argv) {
+  setupMPI(argc, argv);
+  StaticInit::execute();
+  CosmologicalParameters params;
+
+  params.omega_r = 0.0;
+  params.omega_k = 0.0;
+  params.omega_m = 0.30;
+  params.omega_q = 0.70;
+  params.omega_b = 0.049;
+  params.w = -1;
+  params.n_s = 1.0;
+  params.fnl = 0;
+  params.wprime = 0;
+  params.sigma8 = 0.8;
+  params.h = 0.8;
+  params.a0 = 1.0;
+  params.sum_mnu = 0.1; // in eV
+
+  ClassCosmo cc(params);
+
+  // here we output the primordial power-spectrum
+
+  int Nbin = 100;
+
+  double kmin = -6;
+  double kmax = 0.;
+  double dk = (kmax - kmin) / (Nbin - 1);
+
+  std::ofstream f("interpolate_Tk.txt");
+
+  for (int i = 0; i < Nbin; i++) {
+    double k = std::pow(10.0, kmin + dk * i);
+
+    double Pk = cc.primordial_Pk(k);
+    double Tk = cc.get_Tk(k);
+
+    f << k << " " << Pk << " " << Tk << std::endl;
+  }
+
+  StaticInit::finalize();
+  doneMPI();
+  return 0;
+}
--- a/libLSS/tests/test_console.cpp
+++ b/libLSS/tests/test_console.cpp
@ -0,0 +1,77 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_console.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/timing_db.hpp"
+#include "libLSS/tools/hdf5_error.hpp"
+
+using namespace std;
+using LibLSS::LOG_STD;
+using LibLSS::LOG_WARNING;
+using LibLSS::LOG_ERROR;
+using boost::format;
+
+static void funInit()
+{
+    cout << "Dummy static init test" << endl;
+}
+
+LibLSS::RegisterStaticInit test_reg(funInit);
+
+int main(int argc, char **argv)
+{
+    LibLSS::MPI_Communication *mpi_world = LibLSS::setupMPI(argc, argv);
+
+    LibLSS::StaticInit::execute();
+    LibLSS::Console& console = LibLSS::Console::instance();
+
+    unlink("timings.h5");
+    {
+    H5::H5File f("timings.h5", H5F_ACC_TRUNC);
+    LibLSS::timings::load(f);
+    }
+
+    console.print<LOG_STD>("Test console");
+    console.print<LOG_WARNING>("Test warning console");
+    console.print<LOG_ERROR>("Test error console");
+
+    LibLSS::Progress<LOG_STD>& p = console.start_progress<LOG_STD>("Test progress", 10);
+
+    console.indent();
+    console.print<LOG_STD>("test indent");
+
+    for (int j = 0; j < 10; j++)
+    {
+        p.update(j);
+        console.print<LOG_STD>("indented");
+        console.indent();
+    }
+    p.destroy();
+    
+    console.print<LOG_STD>(format("This is a formatter test %d, %g") % -2 % 4.3);
+    console.format<LOG_STD>("This is a formatter test2 %d, %g", -2, 4.3);
+
+    {
+      LIBLSS_AUTO_CONTEXT(LOG_STD, ctx);
+      ctx.print("Now in context");
+    }
+
+    {
+    H5::H5File f("timings.h5", H5F_ACC_TRUNC);
+    LibLSS::timings::save(f);
+    }
+
+    console.print_stack_trace();
+
+    LibLSS::StaticInit::finalize();
+    return 0;
+}
--- a/libLSS/tests/test_cosmo_expansion.cpp
+++ b/libLSS/tests/test_cosmo_expansion.cpp
@ -0,0 +1,75 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_cosmo_expansion.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/physics/cosmo.hpp"
+
+using namespace LibLSS;
+
+int main(int argc, char **argv) {
+  setupMPI(argc, argv);
+  StaticInit::execute();
+  CosmologicalParameters params;
+
+  params.omega_r = 0;
+  params.omega_k = 0;
+  params.omega_m = 0.10;
+  params.omega_b = 0.049;
+  params.omega_q = 0.90;
+  params.w = -1;
+  params.wprime = 0;
+  params.n_s = 1;
+  params.sigma8 = 0.8;
+  params.rsmooth = 0;
+  params.h = 0.7;
+  params.beta = 0;
+  params.z0 = 0;
+  params.a0 = 1;
+
+  Cosmology cosmo(params);
+  Cosmology cosmo2(params);
+
+  cosmo.precompute_com2a();
+  for (int i = 0; i <= 100; i++) {
+    double z = i / 100., znew;
+    double d;
+    bool pass;
+
+    d = cosmo.com2comph(cosmo.a2com(cosmo.z2a(z)));
+    znew = cosmo.a2z(cosmo.com2a(cosmo.comph2com(d)));
+
+    pass = std::abs(z - znew) < 1e-5;
+
+    std::cout << z << " " << znew << " " << d << " " << pass << std::endl;
+    if (pass == 0)
+      return 1;
+  }
+
+  cosmo.precompute_d_plus();
+  {
+    double Dtest = cosmo.d_plus(0.7);
+    double Dtest2 = cosmo2.d_plus(0.7);
+    std::cout << Dtest << Dtest2 << std::endl;
+  }
+  for (int i = 0; i <= 100; i++) {
+    double z = i / 100.;
+    double D = cosmo.d_plus(cosmo.z2a(z));
+    double D2 = cosmo2.d_plus(cosmo2.z2a(z));
+    bool pass = std::abs(D - D2) < 1e-5;
+    std::cout << z << " " << D << " " << D2 << " " << pass << std::endl;
+    if (pass == 0)
+      return 1;
+  }
+
+  StaticInit::finalize();
+  doneMPI();
+  return 0;
+}
--- a/libLSS/tests/test_cosmo_expansion.cpp.expected
+++ b/libLSS/tests/test_cosmo_expansion.cpp.expected
@ -0,0 +1,101 @@
+0 -0 1
+0.01 29.9566 1
+0.02 59.8676 1
+0.03 89.7322 1
+0.04 119.55 1
+0.05 149.32 1
+0.06 179.041 1
+0.07 208.713 1
+0.08 238.336 1
+0.09 267.908 1
+0.1 297.429 1
+0.11 326.898 1
+0.12 356.314 1
+0.13 385.678 1
+0.14 414.987 1
+0.15 444.243 1
+0.16 473.443 1
+0.17 502.587 1
+0.18 531.675 1
+0.19 560.706 1
+0.2 589.679 1
+0.21 618.594 1
+0.22 647.45 1
+0.23 676.246 1
+0.24 704.983 1
+0.25 733.659 1
+0.26 762.273 1
+0.27 790.826 1
+0.28 819.316 1
+0.29 847.743 1
+0.3 876.106 1
+0.31 904.406 1
+0.32 932.64 1
+0.33 960.81 1
+0.34 988.913 1
+0.35 1016.95 1
+0.36 1044.92 1
+0.37 1072.82 1
+0.38 1100.66 1
+0.39 1128.43 1
+0.4 1156.13 1
+0.41 1183.75 1
+0.42 1211.31 1
+0.43 1238.8 1
+0.44 1266.22 1
+0.45 1293.57 1
+0.46 1320.85 1
+0.47 1348.05 1
+0.48 1375.18 1
+0.49 1402.24 1
+0.5 1429.23 1
+0.51 1456.14 1
+0.52 1482.98 1
+0.53 1509.74 1
+0.54 1536.43 1
+0.55 1563.05 1
+0.56 1589.59 1
+0.57 1616.05 1
+0.58 1642.44 1
+0.59 1668.75 1
+0.6 1694.99 1
+0.61 1721.14 1
+0.62 1747.23 1
+0.63 1773.23 1
+0.64 1799.16 1
+0.65 1825.01 1
+0.66 1850.78 1
+0.67 1876.47 1
+0.68 1902.08 1
+0.69 1927.62 1
+0.7 1953.07 1
+0.71 1978.45 1
+0.72 2003.75 1
+0.73 2028.96 1
+0.74 2054.1 1
+0.75 2079.16 1
+0.76 2104.14 1
+0.77 2129.04 1
+0.78 2153.85 1
+0.79 2178.59 1
+0.8 2203.25 1
+0.81 2227.82 1
+0.82 2252.32 1
+0.83 2276.73 1
+0.84 2301.06 1
+0.85 2325.32 1
+0.86 2349.49 1
+0.87 2373.58 1
+0.88 2397.59 1
+0.89 2421.51 1
+0.9 2445.36 1
+0.91 2469.13 1
+0.92 2492.81 1
+0.93 2516.41 1
+0.94 2539.93 1
+0.95 2563.37 1
+0.96 2586.73 1
+0.97 2610.01 1
+0.98 2633.21 1
+0.99 2656.32 1
+1 2679.35 1
--- a/libLSS/tests/test_cpu_feature.cpp
+++ b/libLSS/tests/test_cpu_feature.cpp
@ -0,0 +1,11 @@
+#include <string>
+#include <iostream>
+#include "libLSS/tools/cpu/feature_check.hpp"
+
+int main()
+{
+	std::string s;
+	LibLSS::check_compatibility(s);
+	std::cout << s << std::endl;
+	return 0;
+}
--- a/libLSS/tests/test_fuse_wrapper.cpp
+++ b/libLSS/tests/test_fuse_wrapper.cpp
@ -0,0 +1,146 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_fuse_wrapper.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include <boost/multi_array.hpp>
+#include <boost/timer/timer.hpp>
+#include "libLSS/tools/fused_array.hpp"
+#include "libLSS/tools/fusewrapper.hpp"
+#include "libLSS/tools/static_init.hpp"
+
+double fun() {
+  static int i = 0;
+  i++;
+  return i;
+}
+
+using namespace boost::timer;
+
+int main() {
+  LibLSS::StaticInit::execute();
+  using boost::extents;
+  using boost::multi_array;
+  using LibLSS::_p1;
+  using LibLSS::_p2;
+  using LibLSS::b_fused_idx;
+  using LibLSS::b_va_fused;
+  using LibLSS::fwrap;
+
+  size_t N = 256;
+  multi_array<double, 3> A(extents[N][N][N]);
+  multi_array<double, 3> B(extents[N][N][N]);
+
+  auto fA = fwrap(A);
+  auto fC = fwrap(fA.fautowrap(fun));
+  auto fD = LibLSS::b_fused<double>(A, 2.0 * M_PI * _p1);
+
+  // Initialize A with some linear space.
+  fA = b_fused_idx<double, 3>([N](int i, int j, int k) -> double {
+    return double(i) / N + double(j) / N + double(k) / N;
+  });
+
+  std::cout << "Reference: " << LibLSS::reduce_sum<double>(A) << std::endl;
+
+  {
+    double r = 0;
+    for (int i = 0; i < N; i++)
+      for (int j = 0; j < N; j++)
+        for (int k = 0; k < N; k++)
+          r += A[i][j][k];
+    std::cout << "Manual: " << r << std::endl;
+  }
+
+  {
+    cpu_timer timer;
+    double r = 0;
+    for (int i = 0; i < 10; i++)
+      r += ((fA * 2. + 5.) / 7).sum();
+    std::cout << "10 composite multiply, sum and reduce:" << timer.format()
+              << " " << r << std::endl;
+  }
+  // Create a lazy expression.
+  auto fB = std::cos(fA * (2 * M_PI)); //std::cos(fA*2*M_PI);
+  // WORKS PARTIALLY: shapeness must be better computed
+  auto fB2 = std::cos((2 * M_PI) * fA); //std::cos(fA*2*M_PI);
+
+  std::cout << fwrap(fD).sum() << std::endl;
+
+  // This does a full collapse of the expression, including the squaring
+
+  {
+    cpu_timer timer;
+    std::cout << (LibLSS::ipow<2>(fB)).sum() / LibLSS::ipow<3>(N) << std::endl;
+    std::cout << "Composite multiply, cos, square and reduce:" << timer.format()
+              << std::endl;
+    std::cout << (LibLSS::ipow<2>(fB2)).sum() / LibLSS::ipow<3>(N) << std::endl;
+  }
+
+  {
+    cpu_timer timer;
+    std::cout << std::abs(fB).sum() / LibLSS::ipow<3>(N) << std::endl;
+    std::cout << "Composite multiply, cos, abs and reduce:" << timer.format()
+              << std::endl;
+  }
+
+  //std::cout << fB->shape()[0] << std::endl;
+
+  // Assign the cos part
+  auto fE = fwrap(B);
+  {
+    cpu_timer timer;
+    fE = fB;
+    std::cout << "Composite multiply, cos and assign:" << timer.format()
+              << std::endl;
+  }
+
+  {
+    cpu_timer timer;
+    std::cout << (fE * fE).sum() << std::endl;
+    std::cout << "Composite square and reduce:" << timer.format() << std::endl;
+  }
+
+  std::cout << std::pow(std::abs(fE), 2.5).sum()
+            << std::endl; ////std::pow(std::abs(fE), 2.5).sum() << std::endl;
+  std::cout << (std::abs(fE)).min()
+            << std::endl; ////std::pow(std::abs(fE), 2.5).sum() << std::endl;
+  std::cout << (std::abs(fE)).max()
+            << std::endl; ////std::pow(std::abs(fE), 2.5).sum() << std::endl;
+  double r = std::numeric_limits<double>::infinity();
+  for (size_t i = 0; i < N; i++)
+    for (size_t j = 0; j < N; j++)
+      for (size_t k = 0; k < N; k++)
+        r = std::min(r, std::abs((*fE)[i][j][k]));
+
+  std::cout << r << std::endl;
+
+  fwrap(B) = fwrap(A);
+
+
+  fwrap(B) = -fwrap(A);
+
+  std::cout << fwrap(B).sum() << " " << fwrap(A).sum() << std::endl;
+
+  std::cout << fwrap(B).no_parallel().sum() << std::endl;
+
+  multi_array<std::complex<double>, 3> c_B(extents[N][N][N]);
+   auto f_c_B = fwrap(c_B);
+double x = std::real(f_c_B).sum();
+   std::cout << x << std::endl;
+
+
+  auto c_a = LibLSS::make_complex(fwrap(A), fwrap(B));
+
+  //double sB;
+  //auto scalar_A = fwrap(1.0);
+  //auto scalar_B = fwrap(sB);
+
+  //scalar_B = scalar_A + 2;
+
+  return 0; //fA.sum();
+}
--- a/libLSS/tests/test_fused_array.cpp
+++ b/libLSS/tests/test_fused_array.cpp
@ -0,0 +1,234 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_fused_array.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include "libLSS/tools/phoenix_vars.hpp"
+#include <boost/phoenix/operator.hpp>
+#include <boost/format.hpp>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/fused_array.hpp"
+#include "libLSS/tools/fused_masked_assign.hpp"
+#include "libLSS/tools/fused_reduce.hpp"
+#include "libLSS/tools/array_tools.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include <boost/chrono.hpp>
+
+using namespace std;
+using namespace LibLSS;
+
+class TimeContext {
+protected:
+    std::string code;
+    boost::chrono::system_clock::time_point start_context;
+public:
+    TimeContext(const std::string& code_name) {
+        start_context = boost::chrono::system_clock::now();
+        code = code_name;
+    }
+
+    ~TimeContext() {
+        boost::chrono::duration<double> ctx_time = boost::chrono::system_clock::now() - start_context;
+        cout << boost::format("Done %s  in %s")  % code % ctx_time << endl;;
+    }
+};
+
+template<typename A>
+void printer(const A& a)
+{
+    for (int i = 0; i < a.num_elements(); i++)
+        cout << a[i] << endl;
+}
+
+struct MulOp {
+    int operator()(const int& a) const {
+        return 2*a;
+    }
+};
+
+template<typename T, typename T2, typename Operation>
+void hand_coded(T& a, const T2& b, Operation op)
+{
+size_t e1 = a.shape()[0], e2 = a.shape()[1], e3 = a.shape()[2];
+#pragma omp parallel for collapse(3)
+  for (size_t i = 0; i < e1; i++) {
+    for (size_t j = 0; j < e2; j++)
+    for (size_t k = 0; k < e3; k++)
+    {
+      a[i][j][k] = op(b[i][j][k]);
+    }
+  }
+}
+
+template<typename T, typename T2>
+void hand_constant(T& a, T2 value)
+{
+#pragma omp parallel for
+  for (size_t i = 0; i < a.shape()[0]; i++) {
+    for (size_t j = 0; j < a.shape()[1]; j++)
+    for (size_t k = 0; k < a.shape()[2]; k++) {
+      a[i][j][k] = value;
+    }
+  }
+}
+
+double op0(int a, int b, int c)
+{
+    return a + 10*b + 100*c;
+}
+
+int main(int argc, char **argv)
+{
+    setupMPI(argc, argv);
+    StaticInit::execute();
+    using boost::lambda::_1;
+    using boost::lambda::_2;
+    using boost::lambda::constant;
+    namespace Larray = ::LibLSS::array;
+
+    const std::size_t N = 128;
+    typedef boost::multi_array<double,3> Array;
+    Array::index_gen indices;
+    typedef boost::multi_array<double,3> DArray;
+    Array a(boost::extents[N][N][N]),
+        b(boost::extents[N][N][N]),
+        c(boost::extents[N][N][N]);
+    DArray d(boost::extents[N][N][N]);
+
+    for (size_t i = 0; i < a.num_elements(); i++) {
+       a.data()[i] = i;
+       b.data()[i] = i*i;
+    }
+#if 0
+    {
+        TimeContext ctx("Constant");
+        for (int j = 0; j < 100; j++)
+            copy_array(c, b_fused<int,3>(constant(2)));
+    }
+
+    {
+        TimeContext ctx("Hand coded Constant");
+        for (int j = 0; j < 100; j++)
+            hand_constant(c, 2);
+    }
+
+
+    {
+        TimeContext ctx("MulOp");
+       for (int j = 0; j < 10000; j++)
+            copy_array(c, b_fused<int>(b,MulOp()));
+    }
+    #endif
+    {
+        TimeContext ctx("Lambda");
+        for (int j = 0; j < 1000; j++)
+            copy_array(c, b_fused<int>(b,2*_p1));
+    }
+    {
+        TimeContext ctx("Lambda va");
+        for (int j = 0; j < 1000; j++)
+            copy_array(c, b_va_fused<int>(2*_p1, b));
+    }
+    {
+        TimeContext ctx("Lambda va on sliced array");
+        auto slicer = indices[Array::index_range(1,N/2)][Array::index_range(1,N/2)][Array::index_range(1,N/2)];
+        for (int j = 0; j < 1000; j++) {
+            auto va = b_va_fused<int>(2*_1, b[slicer]);
+            copy_array_rv(c[slicer], va);
+        }
+    }
+    {
+        TimeContext ctx("Float Lambda");
+        for (int j = 0; j < 1000; j++)
+            copy_array(d, b_va_fused<double>(2*_p1,b), true);
+    }
+
+    Larray::fill(b, 103);
+    for (size_t i = 0; i < b.num_elements(); i++)
+      if (b.data()[i] != 103) {
+        cout << "At element " << i << " b = " << b.data()[i]  << endl;
+        abort();
+      }
+
+    {
+        TimeContext ctx("hand coded lambda");
+        for (int j = 0; j < 1000; j++)
+            hand_coded(c, b, 2*_p1);
+    }
+    Larray::copyArray3d(a, b);
+    Larray::scaleArray3d(a, 2);
+    for (size_t i = 0; i < a.num_elements(); i++)
+      if (a.data()[i] != c.data()[i]) {
+        cout << "At element " << i << " a = " << a.data()[i] << " c = " << c.data()[i]  << endl;
+        abort();
+      }
+
+    {
+        Array d(boost::extents[2][N][N]);
+        copy_array_rv(d[0], b[0]);
+        copy_array_rv(b[0], d[0]);
+        copy_array(b, b_fused_idx<double, 3>(op0));
+        for (size_t i = 0; i < N; i++)
+            for (size_t j = 0; j < N; j++)
+                for (size_t k = 0; k < N; k++)
+                    if (size_t(b[i][j][k]) != (i+10*j+100*k)) {
+                        cout << "Problem at (" << i << "," << j << "," << k << ")" << endl;
+                        cout << "Value in b is " << b[i][j][k] << endl;
+                        abort();
+                    }
+
+        copy_array_rv(
+            d[indices[Array::index_range()][Array::index_range(1,3)][Array::index_range(1,3)]],
+            b[indices[Array::index_range(1,3)][Array::index_range(1,3)][Array::index_range(1,3)]]
+        );
+        for (long i = 0; i < 2; i++) {
+            for (long j = 1; j < 3; j++) {
+                for (long k = 1; k < 3; k++) {
+                    if (d[i][j][k] != (i+1) + 10*j + 100*k) {
+                        cout << "Problem(2) at " << i << "," << j << "," << k << endl;
+                        abort();
+                    }
+                }
+            }
+
+        }
+    }
+
+
+    {
+      copy_array(b, b_fused_idx<int, 3>([N](int i, int j, int k)->double {
+        return 4*i/N;
+      }));
+      copy_array_masked(a, b, b_va_fused<int>(2*_p1, b), b_va_fused<bool>(_p1 > 2, b));
+      std::cout << reduce_sum<int>(a) << " " << reduce_sum<int>(b) << std::endl;
+
+      double s= 0;
+      for (int i = 0; i < N; i++)
+        for (int j = 0; j < N; j++)
+          for (int k = 0; k < N; k++)
+            if (b[i][j][k] > 2)
+              s += b[i][j][k];
+            else
+              s += 2*b[i][j][k];
+
+      std::cout << s << std::endl;
+    }
+
+    {
+      long s = 0;
+      for (int i = 0; i < N; i++)
+        for (int j = 0; j < N; j++)
+          for (int k = 0; k < N; k++)
+            if (b[i][j][k] > 2)  s += a[i][j][k];
+      std::cout << s << " " << reduce_sum<int>(a, b_va_fused<bool>(_p1 > 2, b)) << std::endl;
+    }
+
+
+    return 0;
+}
--- a/libLSS/tests/test_fused_cond.cpp
+++ b/libLSS/tests/test_fused_cond.cpp
@ -0,0 +1,130 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_fused_cond.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include "libLSS/tools/phoenix_vars.hpp"
+#include <boost/phoenix/operator.hpp>
+#include <boost/format.hpp>
+//#include "libLSS/mpi/generic_mpi.hpp"
+//#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/fused_array.hpp"
+#include "libLSS/tools/fused_reduce.hpp"
+#include "libLSS/tools/fused_cond.hpp"
+#include "libLSS/tools/array_tools.hpp"
+#include <boost/chrono.hpp>
+
+using namespace std;
+using namespace LibLSS;
+
+
+class TimeContext {
+protected:
+    std::string code;
+    boost::chrono::system_clock::time_point start_context;
+public:
+    TimeContext(const std::string& code_name) {
+        start_context = boost::chrono::system_clock::now();
+        code = code_name;
+    }
+
+    ~TimeContext() {
+        boost::chrono::duration<double> ctx_time = boost::chrono::system_clock::now() - start_context;
+        cout << boost::format("Done %s  in %s")  % code % ctx_time << endl;;
+    }
+};
+
+
+int main()
+{
+  static constexpr int N = 8192;
+  boost::multi_array<bool, 1> amask(boost::extents[N]);
+  boost::multi_array<double, 1> A(boost::extents[N]);
+  boost::multi_array<double, 1> B(boost::extents[N]);
+  boost::multi_array<double, 1> C(boost::extents[N]);
+  
+  auto mask = b_fused_idx<bool, 1>([](int i)->bool { return (i%2)==0; }, boost::extents[N]);
+  auto a0 =  b_fused_idx<double, 1>(
+      [](int i)->int { return -2*i; },
+      boost::extents[N]
+    );
+  auto b0 =  b_fused_idx<double, 1>(
+      [](int i)->int { return 3*i; },
+      boost::extents[N]
+    );
+
+  LibLSS::copy_array(A, a0);
+  LibLSS::copy_array(B, b0);
+  LibLSS::copy_array(amask, mask);
+
+
+  auto c = b_cond_fused<double>(
+    amask,
+    A,
+    B
+  );
+
+
+  {
+     TimeContext t("Automatic");
+     for (int j = 0; j < 1000000; j++)
+     LibLSS::copy_array(C, c);
+  }
+  {
+     TimeContext t("Hand written");
+     for (int j = 0; j < 1000000; j++)
+#pragma omp parallel for
+     for (int i = 0; i < N; i++)
+     {
+       if (amask[i])
+         C[i] = A[i];
+       else
+         C[i] = B[i];
+     }
+  }
+
+  auto e = b_cond_fused<double>(mask, 
+    a0, b0
+  );
+  {
+     TimeContext t("Inline");
+     for (int j = 0; j < 1000000; j++)
+     LibLSS::copy_array(C, e);
+  }
+  
+  auto f = b_cond_fused<double>(
+    b_fused_idx<bool, 1>(
+      [](int i)->bool { return (i%2)==0; }, boost::extents[N]
+    ),
+    b_fused_idx<double, 1>(
+      [](int i)->int { return -2*i; },
+      boost::extents[N]
+    ), 
+    b_fused_idx<double, 1>(
+      [](int i)->int { return 3*i; },
+      boost::extents[N]
+    )
+  );
+  
+  {
+     TimeContext t("Inline 2");
+     for (int j = 0; j < 1000000; j++)
+       LibLSS::copy_array(C, f);
+  }
+/*
+  for (int i = 0; i < 16; i++)
+    std::cout  << C[i] << std::endl;
+
+  for (int i = 0; i < 16; i++)
+    std::cout  << e[i] << std::endl;
+ */
+
+  std::cout << reduce_sum<double>(e) << std::endl; 
+
+  return 0;
+}
--- a/libLSS/tests/test_gig.cpp
+++ b/libLSS/tests/test_gig.cpp
@ -0,0 +1,35 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_gig.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include "libLSS/samplers/core/random_number.hpp"
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+#include "libLSS/samplers/core/gig_sampler.hpp"
+
+using std::cout;
+using std::endl;
+using namespace LibLSS;
+
+int main()
+{
+  double a = 10.;
+  double b = 5.;
+
+  double p = 1 - 30.;
+
+  GSL_RandomNumber rgen;
+
+  for (int i = 0; i < 100000; i++) {
+    cout << GIG_sampler_3params(a, b, p, rgen) << endl;
+  }
+
+  return 0;
+
+}
+
--- a/libLSS/tests/test_gradient_supersampling.cpp
+++ b/libLSS/tests/test_gradient_supersampling.cpp
@ -0,0 +1,262 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_gradient_supersampling.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <H5Cpp.h>
+#include <CosmoTool/hdf5_array.hpp>
+#include <boost/bind/bind.hpp>
+#include <complex>
+#include <boost/lambda/lambda.hpp>
+#include <boost/multi_array.hpp>
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+#include "libLSS/tools/fused_array.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/sigcatcher.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include <CosmoTool/algo.hpp>
+
+using namespace LibLSS;
+using CosmoTool::square;
+using namespace std;
+using boost::bind;
+using boost::c_storage_order;
+using boost::ref;
+using CosmoTool::hdf5_write_array;
+using boost::lambda::constant;
+using boost::placeholders::_1;
+
+typedef FFTW_Manager_3d<double> Manager;
+typedef Manager::ArrayFourier F_Array;
+typedef Manager::ArrayReal R_Array;
+
+typedef UninitializedArray<R_Array, Manager::AllocReal> UR_Array;
+typedef UninitializedArray<F_Array, Manager::AllocComplex> UF_Array;
+
+static const double epsilon = 1e-9;
+
+namespace {
+#if defined(ARES_MPI_FFTW)
+    RegisterStaticInit reg0(fftw_mpi_init, fftw_mpi_cleanup, 9, "MPI/FFTW");
+#endif
+    // WISDOM must come at the end. Otherwise it is reset
+    RegisterStaticInit reg1(CosmoTool::init_fftw_wisdom, CosmoTool::save_fftw_wisdom, 12, "FFTW/WISDOM");
+}
+
+
+template<typename RGen>
+double rand_init(RGen& rgen, double fac)
+{
+  return rgen.gaussian_ratio();// * fac;
+}
+
+static
+double filler()
+{
+  static long counter = 0;
+  
+  return 1;
+}
+
+template<typename Array>
+typename Array::element chi2_sum(Manager& mgr, const Array& a)
+{
+  typename Array::element chi2 = 0;
+  
+  for (long i = mgr.startN0; i < mgr.startN0+mgr.localN0; i++)
+    for (long j = 0; j < a.shape()[1]; j++)
+      for (long k = 0; k < a.shape()[2]; k++)
+        chi2 += CosmoTool::square(a[i][j][k]);
+
+  return chi2;
+}
+
+
+template<typename A,typename B>
+double forward_chi2(MPI_Communication *comm, Manager& mgr, Manager& mgr2, Manager::plan_type& plan, const A& a, const B& mu)
+{      
+  using boost::lambda::_1;
+  using boost::lambda::_2;
+  UF_Array tmp_hi(mgr2.extents_complex(), mgr2.allocator_complex);
+  UR_Array r_hi_array(mgr2.extents_real(), mgr2.allocator_real);
+
+  LibLSS::array::fill(tmp_hi.get_array(), 0);
+  mgr2.upgrade_complex(mgr, a, tmp_hi.get_array());
+  copy_array(tmp_hi.get_array(), b_fused<std::complex<double> >(tmp_hi.get_array(),mu,_1+_2));
+  mgr2.execute_c2r(plan, tmp_hi.get_array().data(), r_hi_array.get_array().data());
+
+  double chi2 = chi2_sum(mgr2, r_hi_array.get_array());
+
+  comm->all_reduce_t(MPI_IN_PLACE, &chi2, 1, MPI_SUM);
+  
+  return chi2;
+}
+
+template<typename A, typename B>
+void gradient_chi2(Manager& mgr, Manager& mgr2, Manager::plan_type& plan, const A& a, B& mu, A& c, B& d)
+{      
+  using boost::lambda::_1;
+  using boost::lambda::_2;
+  UF_Array mu_lo(mgr.extents_complex(), mgr.allocator_complex);
+  UF_Array sum_hi(mgr2.extents_complex(), mgr2.allocator_complex);
+  long N = mgr2.N0 * mgr2.N1 * mgr2.N2;
+
+  LibLSS::array::fill(sum_hi.get_array(), 0);
+
+  mgr2.upgrade_complex(mgr, a, sum_hi.get_array());
+  copy_array(sum_hi.get_array(), b_fused<std::complex<double> >(sum_hi.get_array(), mu, _1+_2));
+  mgr.degrade_complex(mgr2, sum_hi.get_array(), c);
+
+  LibLSS::array::copyArray3d(d, sum_hi.get_array());
+  
+  LibLSS::array::scaleArray3d(c, 4*N);
+  
+  if (mgr.on_core(0)) {
+    c[0][0][0] /= 2;
+    c[0][mgr.N1/2][0] /= 2;
+    c[0][0][mgr.N2/2] /= 2;
+    c[0][mgr.N1/2][mgr.N2/2] /= 2;
+  }
+
+  if (mgr.on_core(mgr.N0/2)) {
+    c[mgr.N0/2][0][0] /= 2;
+    c[mgr.N0/2][mgr.N1/2][0] /= 2;
+    c[mgr.N0/2][0][mgr.N2/2] /= 2;
+    c[mgr.N0/2][mgr.N1/2][mgr.N2/2] /= 2;
+  }
+}
+
+int main(int argc, char **argv)
+{
+    using boost::format;
+    using boost::str;
+    MPI_Communication *world = setupMPI(argc, argv);
+
+    typedef RandomNumberMPI<GSL_RandomNumber> RGen;
+
+    StaticInit::execute();    
+    Console& cons = Console::instance();
+    cons.outputToFile(str(format("log_test_supersampling.txt.%d") % world->rank()));
+    cons.setVerboseLevel<LOG_DEBUG>();
+
+    Manager mgr(16,16,16, world);
+    Manager mgr2(32,32,32, world);
+    
+    {
+      RGen rgen(world, -1);
+
+      rgen.seed(97249);
+
+      F_Array f_lo_array(mgr.extents_complex(), c_storage_order(), mgr.allocator_complex);
+      F_Array tmp_f_array(mgr.extents_complex(), c_storage_order(), mgr.allocator_complex);
+      F_Array gradient_ref(mgr.extents_complex(), c_storage_order(), mgr.allocator_complex);
+      F_Array gradient(mgr.extents_complex(), c_storage_order(), mgr.allocator_complex);
+      R_Array r_array(mgr.extents_real(), c_storage_order(), mgr.allocator_real);
+      R_Array tmp_array(mgr.extents_real(), c_storage_order(), mgr.allocator_real);
+      R_Array r_hi_array(mgr2.extents_real(), c_storage_order(), mgr2.allocator_real);
+      F_Array f_hi_array(mgr2.extents_complex(), c_storage_order(), mgr2.allocator_complex);
+      F_Array tmp_gradient(mgr2.extents_complex(), c_storage_order(), mgr2.allocator_complex);
+      F_Array mu(mgr2.extents_complex(), c_storage_order(), mgr2.allocator_complex);
+      Manager::plan_type plan_r2c = mgr.create_r2c_plan(r_array.data(), f_lo_array.data());
+      Manager::plan_type plan_r2c_hi = mgr2.create_r2c_plan(r_hi_array.data(), f_hi_array.data());
+      Manager::plan_type plan_c2r_hi = mgr2.create_c2r_plan(f_hi_array.data(), r_hi_array.data());
+
+      double fac = 1/double(r_array.num_elements());
+
+      copy_array(r_hi_array, b_fused<double, 3>( bind(rand_init<RGen>, boost::ref(rgen), 1) ) );
+      mgr2.execute_r2c(plan_r2c_hi, r_hi_array.data(), mu.data());
+      LibLSS::array::scaleArray3d(mu, 1.0/r_hi_array.num_elements());
+
+      // Generate random numbers
+      copy_array(r_array, b_fused<double, 3>( bind(rand_init<RGen>, boost::ref(rgen), fac) ) );          
+      // Save them
+      LibLSS::array::copyArray3d(tmp_array, r_array);
+      mgr.execute_r2c(plan_r2c, tmp_array.data(), f_lo_array.data());
+      LibLSS::array::scaleArray3d(f_lo_array, 1.0/r_array.num_elements());
+
+      LibLSS::array::fill(gradient_ref, 0);
+
+      double chi2 = forward_chi2(world, mgr, mgr2, plan_c2r_hi, f_lo_array, mu);
+      for (long i = 0; i < mgr.N0; i++) {
+        for (long j = 0; j < mgr.N1; j++) {
+          for (long k = 0; k < mgr.N2_HC; k++) {
+            std::complex<double> delta(0,0);
+
+            cons.print<LOG_DEBUG>(format("doing %d,%d,%d") % i % j % k);
+            LibLSS::array::copyArray3d(tmp_f_array, f_lo_array);
+            if (mgr.on_core(i))
+              tmp_f_array[i][j][k] = f_lo_array[i][j][k] + std::complex<double>(epsilon,0);
+            if (k==mgr.N2/2 || k == 0) {
+              long plane = (mgr.N0-i)%mgr.N0;
+              F_Array::element value = 0;
+              
+              if (mgr.on_core(plane)) {
+                if (world->size() > 1 && !mgr.on_core(i)) 
+                  world->recv(&value, 1, translateMPIType<F_Array::element>(), mgr.get_peer(i), i);
+                else
+                  value = tmp_f_array[i][j][k];
+        
+                tmp_f_array[plane][(mgr.N1-j)%mgr.N1][k] = std::conj(value);
+              } else if (mgr.on_core(i)) {
+                world->send(&tmp_f_array[i][j][k], 1, translateMPIType<F_Array::element>(), mgr.get_peer(plane), i);
+              }
+            }
+           
+            delta.real((forward_chi2(world, mgr, mgr2, plan_c2r_hi, tmp_f_array, mu) - chi2)/epsilon);
+
+            if (mgr.on_core(i))
+              tmp_f_array[i][j][k] = f_lo_array[i][j][k] + std::complex<double>(0,epsilon);
+            if (k==mgr.N2/2 || k == 0) {
+              long plane = (mgr.N0-i)%mgr.N0;
+              F_Array::element value = 0;
+
+              if (mgr.on_core(i) && plane == i && (mgr.N1-j)%mgr.N1 == j) {
+                tmp_f_array[i][j][k].imag(0);
+              }
+              if (mgr.on_core(plane)) {
+                  if (world->size() > 1 && !mgr.on_core(i)) 
+                    world->recv(&value, 1, translateMPIType<F_Array::element>(), mgr.get_peer(i), i);
+                  else
+                    value = tmp_f_array[i][j][k];
+                  tmp_f_array[plane][(mgr.N1-j)%mgr.N1][k] = std::conj(value);
+              } else if (mgr.on_core(i)) {
+                  world->send(&tmp_f_array[i][j][k], 1, translateMPIType<F_Array::element>(), mgr.get_peer(plane), i);
+              }
+            }
+
+            delta.imag((forward_chi2(world, mgr, mgr2, plan_c2r_hi, tmp_f_array, mu) - chi2)/epsilon);
+            if (mgr.on_core(i))
+                gradient_ref[i][j][k] = delta;
+          }
+        }
+      }
+      world->barrier();
+
+      LibLSS::array::fill(gradient, 0);
+      gradient_chi2(mgr, mgr2, plan_c2r_hi, f_lo_array, mu, gradient, tmp_gradient);
+
+      // Now we have our modes
+      {
+        string s = boost::str(boost::format("test_grad_degrade.h5_%d") % world->rank());
+        H5::H5File f(s, H5F_ACC_TRUNC);
+        hdf5_write_array(f, "gradient_ref", gradient_ref);
+        hdf5_write_array(f, "gradient", gradient);
+        hdf5_write_array(f, "gradient_hi", tmp_gradient);
+        hdf5_write_array(f, "mu", mu);
+        hdf5_write_array(f, "lo", f_lo_array);
+        
+        mgr2.upgrade_complex(mgr, f_lo_array, f_hi_array);
+        hdf5_write_array(f, "hi", f_hi_array);
+
+      }
+    }
+    
+    world->barrier();
+    StaticInit::finalize();
+    return 0;
+}
--- a/libLSS/tests/test_has_member.cpp
+++ b/libLSS/tests/test_has_member.cpp
@ -0,0 +1,59 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_has_member.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <typeinfo>
+#include <boost/utility/enable_if.hpp>
+#include <iostream>
+#include "libLSS/tools/checkmem.hpp"
+
+using namespace std;
+
+HAS_MEM_FUNC(checkMember, has_check_member);
+
+struct NoStruct
+{
+
+   int a;
+};
+
+struct YesStruct
+{
+
+   double c;
+   
+   void checkMember() { cout << "Cool" << endl; }
+};
+
+template<typename T>
+typename boost::enable_if<has_check_member<T, void (T::*)()> >::type
+exec_fun() {
+
+    cout << typeid(T).name() << " has the member" << endl;
+    
+    T a;
+    a.checkMember();
+}
+
+template<typename T>
+typename boost::disable_if<has_check_member<T, void (T::*)()> >::type
+exec_fun() {
+    cout << typeid(T).name() << " does not have the member" << endl;
+}
+
+int main()
+{
+    cout << "has_check_member<NoStruct>::value = " << has_check_member<NoStruct, void (NoStruct::*)()>::value << endl;
+    
+    cout << "has_check_member<YesStruct>::value = " << has_check_member<YesStruct, void (YesStruct::*)()>::value << endl;
+
+    exec_fun<NoStruct>();
+    exec_fun<YesStruct>();
+
+    return 0;
+}
--- a/libLSS/tests/test_hdf5_buffered.cpp
+++ b/libLSS/tests/test_hdf5_buffered.cpp
@ -0,0 +1,33 @@
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/hdf5_error.hpp"
+#include <H5Cpp.h>
+#include "libLSS/tools/hdf5_buffered_write.hpp"
+#include "libLSS/tools/fusewrapper.hpp"
+
+using namespace LibLSS;
+
+int main(int argc, char **argv)
+{
+  LibLSS::MPI_Communication *mpi_world = LibLSS::setupMPI(argc, argv);
+  StaticInit::execute();
+
+  H5::H5File f("test.h5", H5F_ACC_TRUNC);
+  auto& cons = Console::instance();
+
+
+  boost::multi_array<double, 3> a(boost::extents[1000][2][3]);
+
+  fwrap(a) = fwrap(b_fused_idx<double,3>([](int q, int r, int s) { return q+2*s; }));
+
+  cons.format<LOG_VERBOSE>("a[5][0] = %g", a[5][0][0]);
+
+  hdf5_write_buffered_array(f, "test", a, true, true, [&](size_t p) { 
+    cons.format<LOG_STD>("Wrote %d", p);
+  });
+  CosmoTool::hdf5_write_array(f, "test2", a);
+
+  StaticInit::finalize();
+
+  return 0;
+}
--- a/libLSS/tests/test_los_projector.cpp
+++ b/libLSS/tests/test_los_projector.cpp
@ -0,0 +1,45 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_los_projector.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <iostream>
+#include "libLSS/physics/projector.hpp"
+
+using namespace LibLSS;
+
+int main()
+{
+	// number of pixels
+	double N[3] = {10,10,10};
+	// size pixels
+	double dl[3]={2,2,2};
+	//lower left corner
+	double min[3]={-1,-1,-1};
+	//observer position
+	double origin[3]={0,0,0};
+	// shooting direction (normalized)
+	double pointing[3]={1./sqrt(2.),1./sqrt(2.),0};
+	//double pointing[3]={1./2.,sqrt(3.)/2.,0};
+	//double pointing[3]={1.,0.,0.};
+	//double pointing[3]={1./sqrt(3.),1./sqrt(3.),1./sqrt(3.)};
+		
+	LOSContainer data;
+	ray_tracer(origin, pointing, min, dl, N, data);
+	
+	std::cout << "L:" << N[0]*dl[0] << "," << N[1]*dl[1] << "," << N[2]*dl[2] << std::endl;
+	std::cout << "corner:" << min[0] << "," << min[1] << "," << min[2] << std::endl;
+	std::cout << "origin:" << origin[0] << "," << origin[1] << "," << origin[2] << std::endl;
+	std::cout << "direction:" << pointing[0] << "," << pointing[1] << "," << pointing[2] << std::endl;
+	std::cout << "voxel_id, los:" << std::endl;
+	for(int i=0; i<10; i++) 
+	{
+		std::cout << data.voxel_id[i][0] << data.voxel_id[i][1] << data.voxel_id[i][2] << " , " << data.dlos[i] << std::endl;
+	}
+	
+	return 0;
+}
--- a/libLSS/tests/test_messenger.cpp
+++ b/libLSS/tests/test_messenger.cpp
@ -0,0 +1,90 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_messenger.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/mcmc/global_state.hpp"
+#include "libLSS/samplers/ares/gibbs_messenger.hpp"
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/hdf5_error.hpp"
+
+using namespace LibLSS;
+
+typedef GSL_RandomNumber RGenType;
+
+int main(int argc, char **argv)
+{
+    MPI_Communication *comm = setupMPI(argc, argv); 
+    StaticInit::execute();
+    Console::instance().setVerboseLevel<LOG_DEBUG>();
+    MarkovState state;
+    SLong *N0, *N1, *N2;
+    SDouble *L0, *L1, *L2;
+    RGenType randgen;
+
+    state.newElement("random_generator", new RandomStateElement<RandomNumber>(&randgen));
+
+    state.newElement("N0", N0 = new SLong());
+    state.newElement("N1", N1 = new SLong());
+    state.newElement("N2", N2 = new SLong());
+
+    state.newElement("L0", L0 = new SDouble());
+    state.newElement("L1", L1 = new SDouble());
+    state.newElement("L2", L2 = new SDouble());
+
+    state.newElement("s_field", new ArrayType(boost::extents[32][32][32]), true);
+
+    N0->value = 32;
+    N1->value = 32;
+    N2->value = 32;
+
+    state.newSyScalar<long>("localN0", N0->value);
+    state.newSyScalar<long>("startN0", 0);
+    state.newSyScalar<long>("NUM_MODES", 100);
+    
+    MessengerSampler s(comm);
+    
+    // Initialize (data,s)->t sampler
+    s.init_markov(state);
+    
+    // Build some mock field
+    ArrayType *field = state.get<ArrayType>("data_field");
+    
+    field->eigen().fill(0);
+    (*field->array)[16][16][16] = 1;
+
+    // Build some s field
+    ArrayType *s_field = state.get<ArrayType>("s_field");
+    
+    s_field->eigen().fill(0);
+    (*s_field->array)[16][16][16] = 1;
+
+    
+    // Setup messenger parameters
+    ArrayType *mmask = state.get<ArrayType>("messenger_mask");
+    mmask->eigen().fill(0);
+    
+    state.get<SDouble>("messenger_tau")->value = 0.0;
+    
+    
+    s.sample(state);
+    
+    {
+        H5::H5File f("dump.h5", H5F_ACC_TRUNC);
+        state.saveState(f);
+        auto f2 = std::make_shared<H5::H5File>("dump_snap.h5", H5F_ACC_TRUNC);
+        state.mpiSaveState(f2, comm, true /* We do not do reassembly but there is only one node */, true);
+    }
+    
+    
+    StaticInit::finalize();
+    
+    return 0;
+}
--- a/libLSS/tests/test_messenger2.cpp
+++ b/libLSS/tests/test_messenger2.cpp
@ -0,0 +1,113 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_messenger2.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/mcmc/global_state.hpp"
+#include "libLSS/samplers/ares/gibbs_messenger.hpp"
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+
+using namespace LibLSS;
+
+typedef GSL_RandomNumber RGenType;
+
+
+int main(int argc, char **argv)
+{
+    StaticInit::execute();
+    MPI_Communication *mpi_world = setupMPI(argc, argv);
+    Console::instance().setVerboseLevel<LOG_DEBUG>();
+    MarkovState state;
+    SLong *N0, *N1, *N2;
+    SDouble *L0, *L1, *L2;
+    RGenType randgen;
+    ArrayType1d *ps;
+    IArrayType *k_keys;
+
+    state.newElement("random_generator", new RandomStateElement<RandomNumber>(&randgen));
+
+    state.newElement("N0", N0 = new SLong());
+    state.newElement("N1", N1 = new SLong());
+    state.newElement("N2", N2 = new SLong());
+
+    state.newElement("L0", L0 = new SDouble());
+    state.newElement("L1", L1 = new SDouble());
+    state.newElement("L2", L2 = new SDouble());
+
+    state.newSyScalar<bool>("messenger_signal_blocked", false);
+
+    state.newSyScalar<long>("NUM_MODES", 100);
+
+    double dk = 2*M_PI/200. * 16 * 2 /100.;
+    boost::array<int, 3> N;
+    boost::array<double, 3> L;
+    N[0] = N[1] = N[2] = 32;
+    L[0] = L[1] = L[2] = 200.;
+    state.newElement("powerspectrum", ps = new ArrayType1d(boost::extents[100]), true);
+    state.newElement("k_keys", k_keys = new IArrayType(boost::extents[32][32][17]));
+
+    for (int ix = 0; ix < 32; ix++) {
+        for (int iy = 0; iy < 32; iy++) {
+            for (int iz = 0; iz < 17; iz++) {
+                boost::array<int, 3> ik;
+                ik[0] = ix;
+                ik[1] = iy;
+                ik[2] = iz;
+
+                (*k_keys->array)[ix][iy][iz] = power_key(N, ik, L, 0, dk, 100);
+            }
+        }
+    }
+
+    ps->eigen().fill(0.00001);
+
+    N0->value = 32;
+    N1->value = 32;
+    N2->value = 32;
+
+    L0->value = 200;
+    L1->value = 200;
+    L2->value = 200;
+
+    MessengerSampler s(mpi_world);
+    MessengerSignalSampler s2(mpi_world);
+
+    // Initialize (data,s)->t sampler
+    s.init_markov(state);
+    s2.init_markov(state);
+
+    // Build some mock field
+    ArrayType *field = state.get<ArrayType>("data_field");
+
+    field->eigen().fill(0);
+    (*field->array)[16][16][16] = 1;
+
+    // Setup messenger parameters
+    ArrayType *mmask = state.get<ArrayType>("messenger_mask");
+    mmask->eigen().fill(0);
+
+    state.get<SDouble>("messenger_tau")->value = 1;
+
+
+    s.sample(state);
+    s2.sample(state);
+
+    {
+        H5::H5File f("dump.h5", H5F_ACC_TRUNC);
+        state.saveState(f);
+    }
+
+
+    StaticInit::finalize();
+
+    doneMPI();
+
+    return 0;
+}
--- a/libLSS/tests/test_messenger3.cpp
+++ b/libLSS/tests/test_messenger3.cpp
@ -0,0 +1,135 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_messenger3.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/mcmc/global_state.hpp"
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/samplers/ares/gibbs_messenger.hpp"
+#include "libLSS/samplers/core/powerspec_tools.hpp"
+#include "libLSS/samplers/ares/powerspectrum_a_sampler.hpp"
+
+using namespace LibLSS;
+
+typedef GSL_RandomNumber RGenType;
+
+
+int main(int argc, char **argv)
+{
+    StaticInit::execute();
+    MPI_Communication *mpi_world = setupMPI(argc, argv);
+    Console::instance().setVerboseLevel<LOG_DEBUG>();
+    MarkovState state;
+    SLong *N0, *N1, *N2, *N2_HC, *NUM_MODES, *localN0, *startN0, *fourierLocalSize;
+    SDouble *L0, *L1, *L2, *K_MIN, *K_MAX;
+    RGenType randgen;
+    ArrayType1d *ps;
+    IArrayType *k_keys;
+
+    state.newElement("random_generator", new RandomGen(&randgen));
+
+    state.newElement("fourierLocalSize", fourierLocalSize = new SLong());
+    state.newElement("localN0", localN0 = new SLong());
+    state.newElement("startN0", startN0 = new SLong());
+    state.newElement("N0", N0 = new SLong());
+    state.newElement("N1", N1 = new SLong());
+    state.newElement("N2", N2 = new SLong());
+    state.newElement("N2_HC", N2_HC = new SLong());
+
+    state.newSyScalar("messenger_signal_blocked", false);
+    state.newSyScalar("power_sampler_a_blocked", false);
+    state.newSyScalar("power_sampler_b_blocked", false);
+
+    state.newElement("NUM_MODES", NUM_MODES = new SLong());
+    state.newElement("K_MIN", K_MIN = new SDouble());
+    state.newElement("K_MAX", K_MAX = new SDouble());
+
+
+    NUM_MODES->value = 100;
+    K_MIN->value = 0;
+    K_MAX->value = 2.;
+
+    state.newElement("L0", L0 = new SDouble());
+    state.newElement("L1", L1 = new SDouble());
+    state.newElement("L2", L2 = new SDouble());
+
+    localN0->value = 64;
+    startN0->value = 0;
+    N0->value = 64;
+    N1->value = 64;
+    N2->value = 64;
+    N2_HC->value = 33;
+    fourierLocalSize->value = 64*64*33;
+
+    L0->value = 200;
+    L1->value = 200;
+    L2->value = 200;
+
+    MessengerSampler s(mpi_world);
+    MessengerSignalSampler s2(mpi_world);
+    PowerSpectrumSampler_a p(mpi_world);
+
+    // Initialize (data,s)->t sampler
+    s.init_markov(state);
+    s2.init_markov(state);
+    p.init_markov(state);
+
+    ArrayType1d::ArrayType& k_val = *state.get<ArrayType1d>("k_modes")->array;
+    int Nk  = NUM_MODES->value;
+
+    s2.setMockGeneration(true);
+
+    // Fill up powerspectrum
+    ps = state.get<ArrayType1d>("powerspectrum");
+    for (int k = 1; k < Nk; k++) {
+        (*ps->array)[k] = pow(k_val[k], -2);
+    }
+
+    // Build some mock field
+    ArrayType *field = state.get<ArrayType>("data_field");
+
+    field->eigen().fill(0);
+
+    // Setup messenger parameters
+    ArrayType *mmask = state.get<ArrayType>("messenger_mask");
+    mmask->eigen().fill(-1);
+
+    (*mmask->array)[16][16][16] = 0;
+
+    state.get<SDouble>("messenger_tau")->value = 1.; // Remove any sign of data. I should add a mechanism to generate unconstrained realizations
+
+
+    // First round is unconstrained
+    s2.sample(state);
+    s2.setMockGeneration(false);
+    field->eigen() = state.get<ArrayType>("s_field")->eigen();
+
+    s.sample(state);
+    s2.sample(state);
+    p.sample(state);
+    s.sample(state);
+    s2.sample(state);
+    p.sample(state);
+    s.sample(state);
+    s2.sample(state);
+    p.sample(state);
+
+    {
+        H5::H5File f("dump.h5", H5F_ACC_TRUNC);
+        state.saveState(f);
+    }
+
+
+    StaticInit::finalize();
+
+    doneMPI();
+
+    return 0;
+}
--- a/libLSS/tests/test_mngp.cpp
+++ b/libLSS/tests/test_mngp.cpp
@ -0,0 +1,151 @@
+/*+
+    ARES/HADES/BORG Package -- ./libLSS/tests/test_mngp.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <cmath>
+#include <CosmoTool/algo.hpp>
+#include <boost/multi_array.hpp>
+#include "libLSS/mpi/generic_mpi.hpp"
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/static_init.hpp"
+#include "libLSS/tools/uninitialized_type.hpp"
+#include "libLSS/tools/array_tools.hpp"
+#include "libLSS/physics/classic_cic.hpp"
+//#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include "libLSS/physics/modified_ngp.hpp"
+#include "libLSS/physics/modified_ngp_smooth.hpp"
+#include "libLSS/physics/cosmo.hpp"
+#include <H5Cpp.h>
+#include <CosmoTool/hdf5_array.hpp>
+#include "libLSS/tools/hdf5_error.hpp"
+#include "libLSS/samplers/rgen/gsl_random_number.hpp"
+#include <boost/chrono.hpp>
+
+//#undef RANDOM_ACCESS
+#define RANDOM_ACCESS
+
+using namespace LibLSS;
+using CosmoTool::cube;
+
+typedef ClassicCloudInCell<double> CIC;
+//typedef ModifiedNGP<double> MNGP;
+typedef ModifiedNGP<double, NGPGrid::Quad> MNGP;
+typedef SmoothModifiedNGP<double, SmoothNGPGrid::Quad> MNGPS;
+//typedef ModifiedNGP<double, NGPGrid::CIC> MNGP;
+
+int main(int argc, char **argv) {
+  StaticInit::execute();
+  MPI_Communication *world = setupMPI(argc, argv);
+  CosmologicalParameters cosmo;
+  cosmo.omega_m = 0.30;
+  cosmo.omega_b = 0.045;
+  cosmo.omega_q = 0.70;
+  cosmo.w = -1;
+  cosmo.n_s = 0.97;
+  cosmo.sigma8 = 0.8;
+  cosmo.h = 0.68;
+  cosmo.a0 = 1.0;
+
+  Console::instance().setVerboseLevel<LOG_DEBUG>();
+
+  double L = 1.0;
+  int N = 64;
+  int Np_g = 128;
+  int Np = cube(Np_g);
+  typedef UninitializedArray<boost::multi_array<double, 3>> U_Density;
+  typedef UninitializedArray<boost::multi_array<double, 2>> U_Particles;
+  U_Density density_p(boost::extents[N][N][N]);
+  U_Density density_mngp_p(boost::extents[N][N][N]);
+  U_Density density_mngps_p(boost::extents[N][N][N]);
+  U_Particles particles_p(boost::extents[Np][3]);
+  U_Particles adjoint_p(boost::extents[Np][3]);
+  U_Particles adjoint_mngp_p(boost::extents[Np][3]);
+  U_Particles adjoint_mngps_p(boost::extents[Np][3]);
+  U_Density::array_type &density = density_p.get_array();
+  U_Density::array_type &density_mngp = density_mngp_p.get_array();
+  U_Density::array_type &density_mngps = density_mngps_p.get_array();
+  U_Particles::array_type &particles = particles_p.get_array();
+  U_Particles::array_type &adjoint = adjoint_p.get_array();
+  U_Particles::array_type &adjoint_mngp = adjoint_mngp_p.get_array();
+  U_Particles::array_type &adjoint_mngps = adjoint_mngps_p.get_array();
+  CIC cic;
+  MNGP mngp;
+
+#ifdef RANDOM_ACCESS
+  RandomNumberThreaded<GSL_RandomNumber> rgen(-1);
+
+#  pragma omp parallel for schedule(static)
+  for (long i = 0; i < Np; i++) {
+    particles[i][0] = L * rgen.uniform();
+    particles[i][1] = L * rgen.uniform();
+    particles[i][2] = L * rgen.uniform();
+  }
+#else
+
+#  pragma omp parallel for schedule(static)
+  for (long i = 0; i < Np; i++) {
+    int iz = (i % Np_g);
+    int iy = ((i / Np_g) % Np_g);
+    int ix = ((i / Np_g / Np_g));
+    particles[i][0] = L / Np_g * ix;
+    particles[i][1] = L / Np_g * iy;
+    particles[i][2] = L / Np_g * iz;
+  }
+
+#endif
+  Console::instance().print<LOG_INFO>("Clearing and projecting");
+  array::fill(density, 0);
+  array::fill(density_mngp, 0);
+  array::fill(density_mngps, 0);
+
+  using namespace boost::chrono;
+  system_clock::time_point start_classic, end_classic, start_mp, end_mp,
+      start_mp2, end_mp2;
+
+  start_classic = system_clock::now();
+  CIC::projection(particles, density, L, L, L, N, N, N);
+  end_classic = system_clock::now();
+
+  CIC::adjoint(particles, density, adjoint, L, L, L, N, N, N, 1.0);
+
+  start_mp = system_clock::now();
+  MNGP::projection(particles, density_mngp, L, L, L, N, N, N);
+  end_mp = system_clock::now();
+
+  start_mp2 = system_clock::now();
+  MNGPS::projection(particles, density_mngps, L, L, L, N, N, N);
+  end_mp2 = system_clock::now();
+
+  MNGP::adjoint(particles, density_mngp, adjoint_mngp, L, L, L, N, N, N, 1.0);
+  MNGPS::adjoint(
+      particles, density_mngps, adjoint_mngps, L, L, L, N, N, N, 1.0);
+
+  duration<double> elapsed_classic = end_classic - start_classic;
+  duration<double> elapsed_mp = end_mp - start_mp;
+  duration<double> elapsed_mps = end_mp2 - start_mp2;
+
+  std::cout << "MNGP: " << elapsed_mp << " MNPS:" << elapsed_mps
+            << "  Classic: " << elapsed_classic << std::endl;
+
+  try {
+    H5::H5File f("cic.h5", H5F_ACC_TRUNC);
+    CosmoTool::hdf5_write_array(f, "density", density);
+    CosmoTool::hdf5_write_array(f, "density_mngp", density_mngp);
+    CosmoTool::hdf5_write_array(f, "density_mngps", density_mngps);
+    CosmoTool::hdf5_write_array(f, "adjoint", adjoint);
+    CosmoTool::hdf5_write_array(f, "adjoint_mngp", adjoint_mngp);
+    CosmoTool::hdf5_write_array(f, "adjoint_mngps", adjoint_mngps);
+  } catch (const H5::FileIException &) {
+    Console::instance().print<LOG_ERROR>(
+        "Failed to load ref_pm.h5 in the current directory. Check in the "
+        "source directory libLSS/tests/");
+    return 1;
+  }
+
+  return 0;
+}
--- a/Show more
+++ b/Show more