Draft MPI version

This commit is contained in:
Guilhem Lavaux 2021-08-19 14:48:10 +03:00
parent c9ac439dd5
commit bf0c2e96b1
3 changed files with 152 additions and 137 deletions

View File

@ -10,6 +10,8 @@ include(FindPkgConfig)
include(FindPackageHandleStandardArgs) include(FindPackageHandleStandardArgs)
include(color_msg) include(color_msg)
find_package(MPI)
option(BUILD_SHARED_LIBS "Build shared libraries." OFF) option(BUILD_SHARED_LIBS "Build shared libraries." OFF)
option(BUILD_STATIC_LIBS "Build static libraries." ON) option(BUILD_STATIC_LIBS "Build static libraries." ON)
option(ENABLE_SHARP "Enable SHARP support." ON) option(ENABLE_SHARP "Enable SHARP support." ON)
@ -74,8 +76,10 @@ SET(CPACK_PACKAGE_VERSION_MINOR "2")
SET(CPACK_PACKAGE_VERSION_PATCH "3${EXTRA_VERSION}") SET(CPACK_PACKAGE_VERSION_PATCH "3${EXTRA_VERSION}")
SET(CPACK_PACKAGE_INSTALL_DIRECTORY "CosmoToolbox-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}") SET(CPACK_PACKAGE_INSTALL_DIRECTORY "CosmoToolbox-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}")
SET(CPACK_STRIP_FILES "lib/libCosmoTool.so") SET(CPACK_STRIP_FILES "lib/libCosmoTool.so")
SET(CPACK_SOURCE_IGNORE_FILES SET(CPACK_SOURCE_IGNORE_FILES "/CVS/;/\\\\.git/;/\\\\.svn/;\\\\.swp$;\\\\.#;/#;.*~;cscope.*;/CMakeFiles/;.*\\\\.cmake;Makefile")
"/CVS/;/\\\\.git/;/\\\\.svn/;\\\\.swp$;\\\\.#;/#;.*~;cscope.*;/CMakeFiles/;.*\\\\.cmake;Makefile")
include_directories( ${MPI_C_INCLUDE_PATH})
add_subdirectory(src) add_subdirectory(src)
add_subdirectory(sample) add_subdirectory(sample)

View File

@ -87,7 +87,7 @@ if (Boost_FOUND)
ENDIF (YORICK_SUPPORT) ENDIF (YORICK_SUPPORT)
if (HDF5_FOUND) if (HDF5_FOUND)
add_executable(simple3DFilter simple3DFilter.cpp) add_executable(simple3DFilter simple3DFilter.cpp)
target_link_libraries(simple3DFilter ${tolink}) target_link_libraries(simple3DFilter ${tolink} ${MPI_C_LIBRARIES})
add_executable(simpleDistanceFilter simpleDistanceFilter.cpp) add_executable(simpleDistanceFilter simpleDistanceFilter.cpp)
target_link_libraries(simpleDistanceFilter ${tolink}) target_link_libraries(simpleDistanceFilter ${tolink})
@ -101,7 +101,7 @@ if (Boost_FOUND)
add_executable(gadgetToArray gadgetToArray.cpp) add_executable(gadgetToArray gadgetToArray.cpp)
target_link_libraries(gadgetToArray ${tolink}) target_link_libraries(gadgetToArray ${tolink})
add_executable(testHDF5 testHDF5.cpp) add_executable(testHDF5 testHDF5.cpp)
target_link_libraries(testHDF5 ${tolink}) target_link_libraries(testHDF5 ${tolink})

View File

@ -1,22 +1,25 @@
#include "openmp.hpp"
#include "omptl/algorithm"
#include <cassert>
#include "yorick.hpp"
#include "sphSmooth.hpp"
#include "mykdtree.hpp"
#include "miniargs.hpp"
#include <H5Cpp.h> #include <H5Cpp.h>
#include "hdf5_array.hpp" #include <mpi.h>
#include <iostream>
#include <boost/format.hpp>
#include <boost/bind.hpp> #include <boost/bind.hpp>
#include <boost/format.hpp>
#include <cassert>
#include <iostream>
#include "hdf5_array.hpp"
#include "miniargs.hpp"
#include "mykdtree.hpp"
#include "omptl/algorithm"
#include "openmp.hpp"
#include "sphSmooth.hpp"
#include "yorick.hpp"
using namespace std; using namespace std;
using namespace CosmoTool; using namespace CosmoTool;
#define N_SPH 32 #define N_SPH 32
struct VCoord{ struct VCoord {
float v[3]; float v[3];
float mass; float mass;
}; };
@ -27,130 +30,124 @@ typedef boost::multi_array<float, 2> array_type;
typedef boost::multi_array<float, 3> array3_type; typedef boost::multi_array<float, 3> array3_type;
typedef boost::multi_array<float, 4> array4_type; typedef boost::multi_array<float, 4> array4_type;
ComputePrecision getVelocity(const VCoord& v, int i) ComputePrecision getVelocity(const VCoord& v, int i) { return v.mass * v.v[i]; }
{
return v.mass * v.v[i];
}
ComputePrecision getMass(const VCoord& v) ComputePrecision getMass(const VCoord& v) { return v.mass; }
{
return v.mass;
}
typedef SPHSmooth<VCoord> MySmooth; typedef SPHSmooth<VCoord> MySmooth;
typedef MySmooth::SPHTree MyTree; typedef MySmooth::SPHTree MyTree;
typedef MyTree::Cell MyCell; typedef MyTree::Cell MyCell;
template<typename FuncT> template <typename FuncT>
void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx, double cy, double cz, void computeInterpolatedField(MyTree* tree1, double boxsize, int Nres,
array3_type& bins, array3_type& arr, FuncT func, double rLimit2) double cx, double cy, double cz,
{ array3_type& bins, array3_type& arr, FuncT func,
double rLimit2) {
#pragma omp parallel #pragma omp parallel
{ {
MySmooth smooth1(tree1, N_SPH); MySmooth smooth1(tree1, N_SPH);
#pragma omp for schedule(dynamic)
for (int rz = 0; rz < Nres; rz++)
{
double pz = (rz)*boxsize/Nres-cz;
cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres << endl; #pragma omp for schedule(dynamic)
for (int ry = 0; ry < Nres; ry++) for (int rz = 0; rz < Nres; rz++) {
{ double pz = (rz)*boxsize / Nres - cz;
double py = (ry)*boxsize/Nres-cy;
for (int rx = 0; rx < Nres; rx++)
{
double px = (rx)*boxsize/Nres-cx;
MyTree::coords c = { float(px), float(py), float(pz) };
double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2]; cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres << endl;
if (r2 > rLimit2) for (int ry = 0; ry < Nres; ry++) {
{ double py = (ry)*boxsize / Nres - cy;
arr[rx][ry][rz] = 0; for (int rx = 0; rx < Nres; rx++) {
continue; double px = (rx)*boxsize / Nres - cx;
}
uint32_t numInCell = bins[rx][ry][rz]; MyTree::coords c = {float(px), float(py), float(pz)};
if (numInCell > N_SPH)
smooth1.fetchNeighbours(c, numInCell);
else
smooth1.fetchNeighbours(c);
arr[rx][ry][rz] = smooth1.computeSmoothedValue(c, func); double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2];
} if (r2 > rLimit2) {
arr[rx][ry][rz] = 0;
continue;
} }
uint32_t numInCell = bins[rx][ry][rz];
if (numInCell > N_SPH)
smooth1.fetchNeighbours(c, numInCell);
else
smooth1.fetchNeighbours(c);
arr[rx][ry][rz] = smooth1.computeSmoothedValue(c, func);
}
} }
}
} }
} }
int main(int argc, char **argv) int main(int argc, char** argv) {
{ int provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
if (provided < MPI_THREAD_FUNNELED) {
std::cerr << "Cannot mix MPI and Threads here. Please recompile with "
"OpenMP or MPI switched off."
<< std::endl;
MPI_Abort(MPI_COMM_WORLD, 99);
}
char *fname1, *fname2; char *fname1, *fname2;
double rLimit, boxsize, rLimit2, cx, cy, cz; double rLimit, boxsize, rLimit2, cx, cy, cz;
int Nres; int Nres;
MiniArgDesc args[] = { MiniArgDesc args[] = {{"INPUT DATA1", &fname1, MINIARG_STRING},
{ "INPUT DATA1", &fname1, MINIARG_STRING }, {"RADIUS LIMIT", &rLimit, MINIARG_DOUBLE},
{ "RADIUS LIMIT", &rLimit, MINIARG_DOUBLE }, {"BOXSIZE", &boxsize, MINIARG_DOUBLE},
{ "BOXSIZE", &boxsize, MINIARG_DOUBLE }, {"RESOLUTION", &Nres, MINIARG_INT},
{ "RESOLUTION", &Nres, MINIARG_INT }, {"CX", &cx, MINIARG_DOUBLE},
{ "CX", &cx, MINIARG_DOUBLE }, {"CY", &cy, MINIARG_DOUBLE},
{ "CY", &cy, MINIARG_DOUBLE }, {"CZ", &cz, MINIARG_DOUBLE},
{ "CZ", &cz, MINIARG_DOUBLE }, {0, 0, MINIARG_NULL}};
{ 0, 0, MINIARG_NULL }
};
if (!parseMiniArgs(argc, argv, args)) if (!parseMiniArgs(argc, argv, args)) return 1;
return 1;
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
H5::H5File in_f(fname1, 0); H5::H5File in_f(fname1, 0);
H5::H5File out_f("fields.h5", H5F_ACC_TRUNC); H5::H5File out_f(boost::str(boost::format("fields_%d.h5", rank), H5F_ACC_TRUNC);
array_type v1_data; array_type v1_data;
uint32_t N1_points, N2_points; uint32_t N1_points, N2_points;
array3_type bins(boost::extents[Nres][Nres][Nres]); array3_type bins(boost::extents[Nres][Nres][Nres]);
rLimit2 = rLimit*rLimit; rLimit2 = rLimit * rLimit;
hdf5_read_array(in_f, "particles", v1_data); hdf5_read_array(in_f, "particles", v1_data);
assert(v1_data.shape()[1] == 7); assert(v1_data.shape()[1] == 7);
N1_points = v1_data.shape()[0]; N1_points = v1_data.shape()[0];
cout << "Got " << N1_points << " in the first file." << endl; cout << "Got " << N1_points << " in the first file." << endl;
MyCell *allCells_1 = new MyCell[N1_points]; MyCell* allCells_1 = new MyCell[N1_points];
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long i = 0; i < Nres*Nres*Nres; i++) for (long i = 0; i < Nres * Nres * Nres; i++) bins.data()[i] = 0;
bins.data()[i] = 0;
cout << "Shuffling data in cells..." << endl; cout << "Shuffling data in cells..." << endl;
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int i = 0 ; i < N1_points; i++) for (int i = 0; i < N1_points; i++) {
{ for (int j = 0; j < 3; j++) allCells_1[i].coord[j] = v1_data[i][j];
for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++)
allCells_1[i].coord[j] = v1_data[i][j]; allCells_1[i].val.pValue.v[k] = v1_data[i][3 + k];
for (int k = 0; k < 3; k++) allCells_1[i].val.pValue.mass = v1_data[i][6];
allCells_1[i].val.pValue.v[k] = v1_data[i][3+k]; allCells_1[i].active = true;
allCells_1[i].val.pValue.mass = v1_data[i][6]; allCells_1[i].val.weight = 0.0;
allCells_1[i].active = true;
allCells_1[i].val.weight = 0.0;
long rx = floor((allCells_1[i].coord[0]+cx)*Nres/boxsize+0.5); long rx = floor((allCells_1[i].coord[0] + cx) * Nres / boxsize + 0.5);
long ry = floor((allCells_1[i].coord[1]+cy)*Nres/boxsize+0.5); long ry = floor((allCells_1[i].coord[1] + cy) * Nres / boxsize + 0.5);
long rz = floor((allCells_1[i].coord[2]+cz)*Nres/boxsize+0.5); long rz = floor((allCells_1[i].coord[2] + cz) * Nres / boxsize + 0.5);
if (rx < 0 || rx >= Nres || ry < 0 || ry >= Nres || rz < 0 || rz >= Nres) if (rx < 0 || rx >= Nres || ry < 0 || ry >= Nres || rz < 0 || rz >= Nres)
continue; continue;
//#pragma omp atomic update //#pragma omp atomic update
bins[rx][ry][rz]++; bins[rx][ry][rz]++;
} }
v1_data.resize(boost::extents[1][1]); v1_data.resize(boost::extents[1][1]);
hdf5_write_array(out_f, "num_in_cell", bins); hdf5_write_array(out_f, "num_in_cell", bins);
cout << "Building trees..." << endl; cout << "Building trees..." << endl;
@ -158,61 +155,75 @@ int main(int argc, char **argv)
cout << "Creating smoothing filter..." << endl; cout << "Creating smoothing filter..." << endl;
// array3_type out_rad_1(boost::extents[Nres][Nres][Nres]); // array3_type out_rad_1(boost::extents[Nres][Nres][Nres]);
cout << "Weighing..." << endl; cout << "Weighing..." << endl;
#pragma omp parallel #pragma omp parallel
{ {
MySmooth smooth1(&tree1, N_SPH); MySmooth smooth1(&tree1, N_SPH);
#pragma omp for schedule(dynamic)
for (int rz = 0; rz < Nres; rz++)
{
double pz = (rz)*boxsize/Nres-cz;
(cout << rz << " / " << Nres << endl).flush(); #pragma omp for schedule(dynamic)
for (int ry = 0; ry < Nres; ry++) for (int rz = 0; rz < Nres; rz++) {
{ double pz = (rz)*boxsize / Nres - cz;
double py = (ry)*boxsize/Nres-cy;
for (int rx = 0; rx < Nres; rx++)
{
double px = (rx)*boxsize/Nres-cx;
MyTree::coords c = { float(px), float(py), float(pz) };
double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2]; (cout << rz << " / " << Nres << endl).flush();
if (r2 > rLimit2) for (int ry = 0; ry < Nres; ry++) {
{ double py = (ry)*boxsize / Nres - cy;
continue; for (int rx = 0; rx < Nres; rx++) {
} double px = (rx)*boxsize / Nres - cx;
uint32_t numInCell = bins[rx][ry][rz]; MyTree::coords c = {float(px), float(py), float(pz)};
if (numInCell > N_SPH)
smooth1.fetchNeighbours(c, numInCell); double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2];
else if (r2 > rLimit2) {
smooth1.fetchNeighbours(c); continue;
#pragma omp critical
smooth1.addGridSite(c);
}
} }
(cout << " Done " << rz << endl).flush();
} uint32_t numInCell = bins[rx][ry][rz];
if (numInCell > N_SPH)
smooth1.fetchNeighbours(c, numInCell);
else
smooth1.fetchNeighbours(c);
#pragma omp critical
smooth1.addGridSite(c);
}
}
(cout << " Done " << rz << endl).flush();
}
} }
//
// Reduction on the cell.weight in the tree.
// MPI_Allreduce to act on contiguous arrays.
auto tree = smooth1.getTree();
auto nodes = tree->getAllNodes();
double *weight_array = new double[N1_points];
for (size_t c = 0; c < N1_points; c++) {
weight_array[c] = allCells[c].val.weight;
}
MPI_Allreduce(MPI_IN_PLACE, weight_array, N1_points, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
for (size_t c = 0; c < N1_points; c++) {
allCells[c].val.weight = weight_array[c];
}
delete[] weight_array;
// cell.weights -> build a 1d array of the particles weight -> MPI_Allreduce -> resend the new weights to the particles
//
cout << "Interpolating..." << endl; cout << "Interpolating..." << endl;
array3_type interpolated(boost::extents[Nres][Nres][Nres]); array3_type interpolated(boost::extents[Nres][Nres][Nres]);
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins,
bins, interpolated, getMass, rLimit2); interpolated, getMass, rLimit2);
hdf5_write_array(out_f, "density", interpolated); hdf5_write_array(out_f, "density", interpolated);
//out_f.flush(); // out_f.flush();
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins,
bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2); interpolated, boost::bind(getVelocity, _1, i),
hdf5_write_array(out_f, str(format("p%d") % i), interpolated); rLimit2);
hdf5_write_array(out_f, str(format("p%d") % i), interpolated);
} }
return 0; return 0;
}; };