diff --git a/CMakeLists.txt b/CMakeLists.txt index c7b3581..4aa704c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,8 @@ include(FindPkgConfig) include(FindPackageHandleStandardArgs) include(color_msg) +find_package(MPI) + option(BUILD_SHARED_LIBS "Build shared libraries." OFF) option(BUILD_STATIC_LIBS "Build static libraries." ON) option(ENABLE_SHARP "Enable SHARP support." ON) @@ -74,8 +76,10 @@ SET(CPACK_PACKAGE_VERSION_MINOR "2") SET(CPACK_PACKAGE_VERSION_PATCH "3${EXTRA_VERSION}") SET(CPACK_PACKAGE_INSTALL_DIRECTORY "CosmoToolbox-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}") SET(CPACK_STRIP_FILES "lib/libCosmoTool.so") -SET(CPACK_SOURCE_IGNORE_FILES -"/CVS/;/\\\\.git/;/\\\\.svn/;\\\\.swp$;\\\\.#;/#;.*~;cscope.*;/CMakeFiles/;.*\\\\.cmake;Makefile") +SET(CPACK_SOURCE_IGNORE_FILES "/CVS/;/\\\\.git/;/\\\\.svn/;\\\\.swp$;\\\\.#;/#;.*~;cscope.*;/CMakeFiles/;.*\\\\.cmake;Makefile") + +include_directories( ${MPI_C_INCLUDE_PATH}) + add_subdirectory(src) add_subdirectory(sample) diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 03709a7..839b37f 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -87,7 +87,7 @@ if (Boost_FOUND) ENDIF (YORICK_SUPPORT) if (HDF5_FOUND) add_executable(simple3DFilter simple3DFilter.cpp) - target_link_libraries(simple3DFilter ${tolink}) + target_link_libraries(simple3DFilter ${tolink} ${MPI_C_LIBRARIES}) add_executable(simpleDistanceFilter simpleDistanceFilter.cpp) target_link_libraries(simpleDistanceFilter ${tolink}) @@ -101,7 +101,7 @@ if (Boost_FOUND) add_executable(gadgetToArray gadgetToArray.cpp) target_link_libraries(gadgetToArray ${tolink}) - + add_executable(testHDF5 testHDF5.cpp) target_link_libraries(testHDF5 ${tolink}) diff --git a/sample/simple3DFilter.cpp b/sample/simple3DFilter.cpp index 000c6b7..ace6ecb 100644 --- a/sample/simple3DFilter.cpp +++ b/sample/simple3DFilter.cpp @@ -1,22 +1,25 @@ -#include "openmp.hpp" -#include "omptl/algorithm" -#include -#include "yorick.hpp" -#include "sphSmooth.hpp" -#include "mykdtree.hpp" -#include "miniargs.hpp" #include -#include "hdf5_array.hpp" -#include -#include +#include + #include +#include +#include +#include + +#include "hdf5_array.hpp" +#include "miniargs.hpp" +#include "mykdtree.hpp" +#include "omptl/algorithm" +#include "openmp.hpp" +#include "sphSmooth.hpp" +#include "yorick.hpp" using namespace std; using namespace CosmoTool; #define N_SPH 32 -struct VCoord{ +struct VCoord { float v[3]; float mass; }; @@ -27,130 +30,124 @@ typedef boost::multi_array array_type; typedef boost::multi_array array3_type; typedef boost::multi_array array4_type; -ComputePrecision getVelocity(const VCoord& v, int i) -{ - return v.mass * v.v[i]; -} +ComputePrecision getVelocity(const VCoord& v, int i) { return v.mass * v.v[i]; } -ComputePrecision getMass(const VCoord& v) -{ - return v.mass; -} +ComputePrecision getMass(const VCoord& v) { return v.mass; } typedef SPHSmooth MySmooth; typedef MySmooth::SPHTree MyTree; typedef MyTree::Cell MyCell; -template -void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx, double cy, double cz, - array3_type& bins, array3_type& arr, FuncT func, double rLimit2) -{ +template +void computeInterpolatedField(MyTree* tree1, double boxsize, int Nres, + double cx, double cy, double cz, + array3_type& bins, array3_type& arr, FuncT func, + double rLimit2) { #pragma omp parallel { MySmooth smooth1(tree1, N_SPH); - -#pragma omp for schedule(dynamic) - for (int rz = 0; rz < Nres; rz++) - { - double pz = (rz)*boxsize/Nres-cz; - cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres << endl; - for (int ry = 0; ry < Nres; ry++) - { - double py = (ry)*boxsize/Nres-cy; - for (int rx = 0; rx < Nres; rx++) - { - double px = (rx)*boxsize/Nres-cx; - - MyTree::coords c = { float(px), float(py), float(pz) }; +#pragma omp for schedule(dynamic) + for (int rz = 0; rz < Nres; rz++) { + double pz = (rz)*boxsize / Nres - cz; - double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2]; - if (r2 > rLimit2) - { - arr[rx][ry][rz] = 0; - continue; - } + cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres << endl; + for (int ry = 0; ry < Nres; ry++) { + double py = (ry)*boxsize / Nres - cy; + for (int rx = 0; rx < Nres; rx++) { + double px = (rx)*boxsize / Nres - cx; - uint32_t numInCell = bins[rx][ry][rz]; - if (numInCell > N_SPH) - smooth1.fetchNeighbours(c, numInCell); - else - smooth1.fetchNeighbours(c); + MyTree::coords c = {float(px), float(py), float(pz)}; - arr[rx][ry][rz] = smooth1.computeSmoothedValue(c, func); - } + double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2]; + if (r2 > rLimit2) { + arr[rx][ry][rz] = 0; + continue; } + + uint32_t numInCell = bins[rx][ry][rz]; + if (numInCell > N_SPH) + smooth1.fetchNeighbours(c, numInCell); + else + smooth1.fetchNeighbours(c); + + arr[rx][ry][rz] = smooth1.computeSmoothedValue(c, func); + } } + } } } -int main(int argc, char **argv) -{ - +int main(int argc, char** argv) { + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); + if (provided < MPI_THREAD_FUNNELED) { + std::cerr << "Cannot mix MPI and Threads here. Please recompile with " + "OpenMP or MPI switched off." + << std::endl; + MPI_Abort(MPI_COMM_WORLD, 99); + } char *fname1, *fname2; double rLimit, boxsize, rLimit2, cx, cy, cz; int Nres; - MiniArgDesc args[] = { - { "INPUT DATA1", &fname1, MINIARG_STRING }, - { "RADIUS LIMIT", &rLimit, MINIARG_DOUBLE }, - { "BOXSIZE", &boxsize, MINIARG_DOUBLE }, - { "RESOLUTION", &Nres, MINIARG_INT }, - { "CX", &cx, MINIARG_DOUBLE }, - { "CY", &cy, MINIARG_DOUBLE }, - { "CZ", &cz, MINIARG_DOUBLE }, - { 0, 0, MINIARG_NULL } - }; + MiniArgDesc args[] = {{"INPUT DATA1", &fname1, MINIARG_STRING}, + {"RADIUS LIMIT", &rLimit, MINIARG_DOUBLE}, + {"BOXSIZE", &boxsize, MINIARG_DOUBLE}, + {"RESOLUTION", &Nres, MINIARG_INT}, + {"CX", &cx, MINIARG_DOUBLE}, + {"CY", &cy, MINIARG_DOUBLE}, + {"CZ", &cz, MINIARG_DOUBLE}, + {0, 0, MINIARG_NULL}}; - if (!parseMiniArgs(argc, argv, args)) - return 1; + if (!parseMiniArgs(argc, argv, args)) return 1; + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); H5::H5File in_f(fname1, 0); - H5::H5File out_f("fields.h5", H5F_ACC_TRUNC); + H5::H5File out_f(boost::str(boost::format("fields_%d.h5", rank), H5F_ACC_TRUNC); array_type v1_data; uint32_t N1_points, N2_points; - + array3_type bins(boost::extents[Nres][Nres][Nres]); - rLimit2 = rLimit*rLimit; + rLimit2 = rLimit * rLimit; hdf5_read_array(in_f, "particles", v1_data); assert(v1_data.shape()[1] == 7); N1_points = v1_data.shape()[0]; - + cout << "Got " << N1_points << " in the first file." << endl; - MyCell *allCells_1 = new MyCell[N1_points]; - + MyCell* allCells_1 = new MyCell[N1_points]; + #pragma omp parallel for schedule(static) - for (long i = 0; i < Nres*Nres*Nres; i++) - bins.data()[i] = 0; + for (long i = 0; i < Nres * Nres * Nres; i++) bins.data()[i] = 0; cout << "Shuffling data in cells..." << endl; #pragma omp parallel for schedule(static) - for (int i = 0 ; i < N1_points; i++) - { - for (int j = 0; j < 3; j++) - allCells_1[i].coord[j] = v1_data[i][j]; - for (int k = 0; k < 3; k++) - allCells_1[i].val.pValue.v[k] = v1_data[i][3+k]; - allCells_1[i].val.pValue.mass = v1_data[i][6]; - allCells_1[i].active = true; - allCells_1[i].val.weight = 0.0; + for (int i = 0; i < N1_points; i++) { + for (int j = 0; j < 3; j++) allCells_1[i].coord[j] = v1_data[i][j]; + for (int k = 0; k < 3; k++) + allCells_1[i].val.pValue.v[k] = v1_data[i][3 + k]; + allCells_1[i].val.pValue.mass = v1_data[i][6]; + allCells_1[i].active = true; + allCells_1[i].val.weight = 0.0; - long rx = floor((allCells_1[i].coord[0]+cx)*Nres/boxsize+0.5); - long ry = floor((allCells_1[i].coord[1]+cy)*Nres/boxsize+0.5); - long rz = floor((allCells_1[i].coord[2]+cz)*Nres/boxsize+0.5); - - if (rx < 0 || rx >= Nres || ry < 0 || ry >= Nres || rz < 0 || rz >= Nres) - continue; - -//#pragma omp atomic update - bins[rx][ry][rz]++; - } + long rx = floor((allCells_1[i].coord[0] + cx) * Nres / boxsize + 0.5); + long ry = floor((allCells_1[i].coord[1] + cy) * Nres / boxsize + 0.5); + long rz = floor((allCells_1[i].coord[2] + cz) * Nres / boxsize + 0.5); + + if (rx < 0 || rx >= Nres || ry < 0 || ry >= Nres || rz < 0 || rz >= Nres) + continue; + + //#pragma omp atomic update + bins[rx][ry][rz]++; + } v1_data.resize(boost::extents[1][1]); - + hdf5_write_array(out_f, "num_in_cell", bins); cout << "Building trees..." << endl; @@ -158,61 +155,75 @@ int main(int argc, char **argv) cout << "Creating smoothing filter..." << endl; -// array3_type out_rad_1(boost::extents[Nres][Nres][Nres]); - + // array3_type out_rad_1(boost::extents[Nres][Nres][Nres]); + cout << "Weighing..." << endl; #pragma omp parallel { MySmooth smooth1(&tree1, N_SPH); - -#pragma omp for schedule(dynamic) - for (int rz = 0; rz < Nres; rz++) - { - double pz = (rz)*boxsize/Nres-cz; - (cout << rz << " / " << Nres << endl).flush(); - for (int ry = 0; ry < Nres; ry++) - { - double py = (ry)*boxsize/Nres-cy; - for (int rx = 0; rx < Nres; rx++) - { - double px = (rx)*boxsize/Nres-cx; - - MyTree::coords c = { float(px), float(py), float(pz) }; +#pragma omp for schedule(dynamic) + for (int rz = 0; rz < Nres; rz++) { + double pz = (rz)*boxsize / Nres - cz; - double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2]; - if (r2 > rLimit2) - { - continue; - } + (cout << rz << " / " << Nres << endl).flush(); + for (int ry = 0; ry < Nres; ry++) { + double py = (ry)*boxsize / Nres - cy; + for (int rx = 0; rx < Nres; rx++) { + double px = (rx)*boxsize / Nres - cx; - uint32_t numInCell = bins[rx][ry][rz]; - if (numInCell > N_SPH) - smooth1.fetchNeighbours(c, numInCell); - else - smooth1.fetchNeighbours(c); -#pragma omp critical - smooth1.addGridSite(c); - } + MyTree::coords c = {float(px), float(py), float(pz)}; + + double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2]; + if (r2 > rLimit2) { + continue; } - (cout << " Done " << rz << endl).flush(); - } + + uint32_t numInCell = bins[rx][ry][rz]; + if (numInCell > N_SPH) + smooth1.fetchNeighbours(c, numInCell); + else + smooth1.fetchNeighbours(c); +#pragma omp critical + smooth1.addGridSite(c); + } + } + (cout << " Done " << rz << endl).flush(); + } } - + // + // Reduction on the cell.weight in the tree. + // MPI_Allreduce to act on contiguous arrays. + auto tree = smooth1.getTree(); + auto nodes = tree->getAllNodes(); + double *weight_array = new double[N1_points]; + for (size_t c = 0; c < N1_points; c++) { + weight_array[c] = allCells[c].val.weight; + } + MPI_Allreduce(MPI_IN_PLACE, weight_array, N1_points, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for (size_t c = 0; c < N1_points; c++) { + allCells[c].val.weight = weight_array[c]; + } + delete[] weight_array; + + // cell.weights -> build a 1d array of the particles weight -> MPI_Allreduce -> resend the new weights to the particles + // + cout << "Interpolating..." << endl; array3_type interpolated(boost::extents[Nres][Nres][Nres]); - - computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, - bins, interpolated, getMass, rLimit2); + + computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins, + interpolated, getMass, rLimit2); hdf5_write_array(out_f, "density", interpolated); - //out_f.flush(); + // out_f.flush(); for (int i = 0; i < 3; i++) { - computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, - bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2); - hdf5_write_array(out_f, str(format("p%d") % i), interpolated); + computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins, + interpolated, boost::bind(getVelocity, _1, i), + rLimit2); + hdf5_write_array(out_f, str(format("p%d") % i), interpolated); } - + return 0; };