329 lines
10 KiB
C++
329 lines
10 KiB
C++
/*+
|
|
ARES/HADES/BORG Package -- ./libLSS/physics/openmp_cic.hpp
|
|
Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
|
|
Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
|
|
|
|
Additional contributions from:
|
|
Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
|
|
|
|
+*/
|
|
#ifndef __LIBLSS_PHYSICS_OPENMP_CIC_HPP
|
|
#define __LIBLSS_PHYSICS_OPENMP_CIC_HPP
|
|
|
|
#include <cmath>
|
|
#include "libLSS/tools/console.hpp"
|
|
#include <boost/multi_array.hpp>
|
|
#include <CosmoTool/omptl/omptl>
|
|
#include <CosmoTool/omptl/omptl_algorithm>
|
|
#include <iostream>
|
|
#include "libLSS/tools/array_tools.hpp"
|
|
#include "libLSS/physics/generic_cic.hpp"
|
|
|
|
namespace LibLSS {
|
|
|
|
template <typename T>
|
|
struct OpenMPCloudInCell_impl {
|
|
typedef T Type;
|
|
// Number of extra planes required in case of MPI
|
|
static const int MPI_PLANE_LEAKAGE = 1;
|
|
typedef boost::multi_array<int, 1> ListArray;
|
|
typedef boost::multi_array<int, 1> AtomicListArray;
|
|
|
|
template <
|
|
typename ParticleArray, typename ProjectionDensityArray,
|
|
typename WeightArray, typename PeriodicFunction>
|
|
static void projection(
|
|
const ParticleArray &particles, ProjectionDensityArray &density, T Lx,
|
|
T Ly, T Lz, size_t N0, size_t N1, size_t N2, const PeriodicFunction &p,
|
|
const WeightArray &weight, size_t Np) {
|
|
using boost::extents;
|
|
|
|
ConsoleContext<LOG_DEBUG> ctx("OpenMP CIC projection");
|
|
|
|
T inv_dx = N0 / Lx;
|
|
T inv_dy = N1 / Ly;
|
|
T inv_dz = N2 / Lz;
|
|
|
|
typedef UninitializedArray<AtomicListArray> U_AtomicListArray;
|
|
typedef UninitializedArray<ListArray> U_ListArray;
|
|
U_AtomicListArray part_mesh_p(extents[long(N0) * long(N1) * long(N2)]);
|
|
U_ListArray part_list_p(extents[Np]);
|
|
U_AtomicListArray::array_type &part_mesh = part_mesh_p.get_array();
|
|
U_ListArray::array_type &part_list = part_list_p.get_array();
|
|
long Nmesh = part_mesh.num_elements();
|
|
|
|
{
|
|
ConsoleContext<LOG_DEBUG> ctx0("initialize arrays");
|
|
array::fill(part_mesh, -1);
|
|
array::fill(part_list, -1);
|
|
}
|
|
|
|
{
|
|
ConsoleContext<LOG_DEBUG> ctx0("build mesh list");
|
|
// First build part -> mesh list
|
|
#pragma omp parallel for schedule(static)
|
|
for (size_t i_part = 0; i_part < Np; i_part++) {
|
|
|
|
T x = particles[i_part][0] * inv_dx;
|
|
T y = particles[i_part][1] * inv_dy;
|
|
T z = particles[i_part][2] * inv_dz;
|
|
|
|
size_t ix = (size_t)std::floor(x);
|
|
size_t iy = (size_t)std::floor(y);
|
|
size_t iz = (size_t)std::floor(z);
|
|
|
|
size_t idx = iz + N2 * iy + N2 * N1 * ix;
|
|
|
|
int initial_elt =
|
|
__atomic_exchange_n(&part_mesh[idx], i_part, __ATOMIC_RELAXED);
|
|
if (initial_elt != -1) {
|
|
part_list[i_part] = initial_elt;
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
ConsoleContext<LOG_DEBUG> ctx0("reverse list");
|
|
|
|
// We built the list in the incorrect order, reverse it as fast as we can
|
|
#pragma omp parallel for schedule(dynamic, 10000)
|
|
for (size_t mid = 0; mid < Nmesh; mid++) {
|
|
int current_part = part_mesh[mid];
|
|
|
|
if (current_part >= 0) {
|
|
int next_part = part_list[current_part];
|
|
|
|
part_list[current_part] = -1;
|
|
while (next_part != -1) {
|
|
int p = part_list[next_part];
|
|
part_list[next_part] = current_part;
|
|
current_part = next_part;
|
|
next_part = p;
|
|
}
|
|
part_mesh[mid] = current_part;
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
ConsoleContext<LOG_DEBUG> ctx0("projection");
|
|
|
|
#pragma omp parallel
|
|
{
|
|
|
|
for (int looper0 = 0; looper0 < 2; looper0++) {
|
|
for (int looper1 = 0; looper1 < 2; looper1++) {
|
|
for (int looper2 = 0; looper2 < 2; looper2++) {
|
|
|
|
int r[3] = {looper0, looper1, looper2};
|
|
|
|
#pragma omp barrier
|
|
#pragma omp for schedule(dynamic, 10000)
|
|
for (long mid = 0; mid < Nmesh; mid++) {
|
|
int mz = mid % N2;
|
|
int my = (mid / N2) % N1;
|
|
int mx = (mid / (N2 * N1));
|
|
int i_part = part_mesh[mid];
|
|
|
|
T w = 0;
|
|
|
|
while (i_part != -1) {
|
|
T w0 = 1;
|
|
T x = particles[i_part][0] * inv_dx;
|
|
T y = particles[i_part][1] * inv_dy;
|
|
T z = particles[i_part][2] * inv_dz;
|
|
T qx = std::floor(x);
|
|
T qy = std::floor(y);
|
|
T qz = std::floor(z);
|
|
T dx = x - qx;
|
|
T dy = y - qy;
|
|
T dz = z - qz;
|
|
w0 = (r[0] == 1) ? dx : (T(1) - dx);
|
|
w0 *= (r[1] == 1) ? dy : (T(1) - dy);
|
|
w0 *= (r[2] == 1) ? dz : (T(1) - dz);
|
|
w += w0 * weight[i_part];
|
|
i_part = part_list[i_part];
|
|
}
|
|
|
|
size_t tx = (mx + looper0);
|
|
size_t ty = (my + looper1);
|
|
size_t tz = (mz + looper2);
|
|
p(tx, ty, tz);
|
|
density[tx][ty][tz] += w;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#pragma omp barrier
|
|
}
|
|
}
|
|
|
|
template <
|
|
typename GradientArray, typename ProjectionDensityArray,
|
|
typename WeightArray>
|
|
static inline
|
|
typename std::enable_if<WeightArray::dimensionality == 1>::type
|
|
__do_gradient(
|
|
GradientArray &adj_gradient, const ProjectionDensityArray &density,
|
|
WeightArray const &a_w, size_t i, int axis, size_t ix, size_t iy,
|
|
size_t iz, size_t jx, size_t jy, size_t jz, T x, T y, T z,
|
|
T global_w) {
|
|
T rx, ry, rz;
|
|
T qx, qy, qz;
|
|
|
|
switch (axis) {
|
|
case 0:
|
|
rx = 1;
|
|
qx = -1;
|
|
ry = y - iy;
|
|
qy = 1 - ry;
|
|
rz = z - iz;
|
|
qz = 1 - rz;
|
|
break;
|
|
case 1:
|
|
rx = x - ix;
|
|
qx = 1 - rx;
|
|
ry = 1;
|
|
qy = -1;
|
|
rz = z - iz;
|
|
qz = 1 - rz;
|
|
break;
|
|
case 2:
|
|
rx = x - ix;
|
|
qx = 1 - rx;
|
|
ry = y - iy;
|
|
qy = 1 - ry;
|
|
rz = 1;
|
|
qz = -1;
|
|
break;
|
|
}
|
|
|
|
double w = density[ix][iy][iz] * qx * qy * qz +
|
|
density[ix][iy][jz] * qx * qy * rz +
|
|
density[ix][jy][iz] * qx * ry * qz +
|
|
density[ix][jy][jz] * qx * ry * rz +
|
|
density[jx][iy][iz] * rx * qy * qz +
|
|
density[jx][iy][jz] * rx * qy * rz +
|
|
density[jx][jy][iz] * rx * ry * qz +
|
|
density[jx][jy][jz] * rx * ry * rz;
|
|
|
|
adj_gradient[i][axis] = a_w[axis] * w * global_w;
|
|
}
|
|
|
|
template <typename GradientArray, typename ProjectionDensityArray>
|
|
static inline void __do_gradient(
|
|
GradientArray &adj_gradient, const ProjectionDensityArray &density,
|
|
T a_w, size_t i, int axis, size_t ix, size_t iy, size_t iz, size_t jx,
|
|
size_t jy, size_t jz, T x, T y, T z, T global_w) {
|
|
T rx, ry, rz;
|
|
T qx, qy, qz;
|
|
|
|
switch (axis) {
|
|
case 0:
|
|
rx = 1;
|
|
qx = -1;
|
|
ry = y - iy;
|
|
qy = 1 - ry;
|
|
rz = z - iz;
|
|
qz = 1 - rz;
|
|
break;
|
|
case 1:
|
|
rx = x - ix;
|
|
qx = 1 - rx;
|
|
ry = 1;
|
|
qy = -1;
|
|
rz = z - iz;
|
|
qz = 1 - rz;
|
|
break;
|
|
case 2:
|
|
rx = x - ix;
|
|
qx = 1 - rx;
|
|
ry = y - iy;
|
|
qy = 1 - ry;
|
|
rz = 1;
|
|
qz = -1;
|
|
break;
|
|
}
|
|
|
|
double w = density[ix][iy][iz] * qx * qy * qz +
|
|
density[ix][iy][jz] * qx * qy * rz +
|
|
density[ix][jy][iz] * qx * ry * qz +
|
|
density[ix][jy][jz] * qx * ry * rz +
|
|
density[jx][iy][iz] * rx * qy * qz +
|
|
density[jx][iy][jz] * rx * qy * rz +
|
|
density[jx][jy][iz] * rx * ry * qz +
|
|
density[jx][jy][jz] * rx * ry * rz;
|
|
|
|
adj_gradient[i][axis] += a_w * w * global_w;
|
|
}
|
|
|
|
template <
|
|
typename ParticleArray, typename ProjectionDensityArray,
|
|
typename GradientArray, typename PeriodicFunction, typename WeightArray>
|
|
static void adjoint(
|
|
const ParticleArray &particles, ProjectionDensityArray &density,
|
|
GradientArray &adjoint_gradient, const WeightArray &w, T Lx, T Ly, T Lz,
|
|
size_t N0, size_t N1, size_t N2, const PeriodicFunction &p, T nmean,
|
|
size_t Np) {
|
|
ConsoleContext<LOG_DEBUG> ctx("Classic CIC adjoint-projection");
|
|
|
|
T inv_dx = N0 / Lx;
|
|
T inv_dy = N1 / Ly;
|
|
T inv_dz = N2 / Lz;
|
|
T inv_nmean = 1 / nmean;
|
|
size_t minX = density.index_bases()[0], minY = density.index_bases()[1],
|
|
minZ = density.index_bases()[2],
|
|
maxX = density.index_bases()[0] + density.shape()[0],
|
|
maxY = density.index_bases()[1] + density.shape()[1],
|
|
maxZ = density.index_bases()[2] + density.shape()[2];
|
|
|
|
#pragma omp parallel for schedule(static)
|
|
for (long i = 0; i < Np; i++) {
|
|
|
|
T x = particles[i][0] * inv_dx;
|
|
T y = particles[i][1] * inv_dy;
|
|
T z = particles[i][2] * inv_dz;
|
|
|
|
size_t ix = (size_t)std::floor(x);
|
|
size_t iy = (size_t)std::floor(y);
|
|
size_t iz = (size_t)std::floor(z);
|
|
|
|
size_t jx = (ix + 1);
|
|
size_t jy = (iy + 1);
|
|
size_t jz = (iz + 1);
|
|
|
|
p(jx, jy, jz);
|
|
|
|
if (ix < minX || ix >= maxX || iy < minY || iy >= maxY || iz < minZ ||
|
|
iz >= maxZ)
|
|
continue;
|
|
|
|
__do_gradient(
|
|
adjoint_gradient, density, w[i], i, 0, ix, iy, iz, jx, jy, jz, x, y,
|
|
z, inv_dx * inv_nmean);
|
|
__do_gradient(
|
|
adjoint_gradient, density, w[i], i, 1, ix, iy, iz, jx, jy, jz, x, y,
|
|
z, inv_dy * inv_nmean);
|
|
__do_gradient(
|
|
adjoint_gradient, density, w[i], i, 2, ix, iy, iz, jx, jy, jz, x, y,
|
|
z, inv_dz * inv_nmean);
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
class OpenMPCloudInCell : public GenericCIC<T, OpenMPCloudInCell_impl<T>> {
|
|
public:
|
|
typedef T Type;
|
|
// Number of extra planes required in case of MPI
|
|
static const int MPI_PLANE_LEAKAGE = 1;
|
|
static const int MPI_NEGATIVE_PLANE_LEAKAGE = 0;
|
|
typedef CIC_Distribution<T> Distribution;
|
|
typedef CIC_Tools::Periodic_MPI Periodic_MPI;
|
|
};
|
|
|
|
} // namespace LibLSS
|
|
|
|
#endif
|