Draft MPI version

2021-08-19 14:48:10 +03:00
3 changed files with 152 additions and 137 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -10,6 +10,8 @@ include(FindPkgConfig)
 include(FindPackageHandleStandardArgs)
 include(color_msg)

+find_package(MPI)
+
 option(BUILD_SHARED_LIBS "Build shared libraries." OFF)
 option(BUILD_STATIC_LIBS "Build static libraries." ON)
 option(ENABLE_SHARP "Enable SHARP support." ON)
@ -74,8 +76,10 @@ SET(CPACK_PACKAGE_VERSION_MINOR "2")
 SET(CPACK_PACKAGE_VERSION_PATCH "3${EXTRA_VERSION}")
 SET(CPACK_PACKAGE_INSTALL_DIRECTORY "CosmoToolbox-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}")
 SET(CPACK_STRIP_FILES "lib/libCosmoTool.so")
-SET(CPACK_SOURCE_IGNORE_FILES 
-"/CVS/;/\\\\.git/;/\\\\.svn/;\\\\.swp$;\\\\.#;/#;.*~;cscope.*;/CMakeFiles/;.*\\\\.cmake;Makefile")
+SET(CPACK_SOURCE_IGNORE_FILES "/CVS/;/\\\\.git/;/\\\\.svn/;\\\\.swp$;\\\\.#;/#;.*~;cscope.*;/CMakeFiles/;.*\\\\.cmake;Makefile")
+
+include_directories( ${MPI_C_INCLUDE_PATH})
+

 add_subdirectory(src)
 add_subdirectory(sample)
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@ -87,7 +87,7 @@ if (Boost_FOUND)
  ENDIF (YORICK_SUPPORT)
  if (HDF5_FOUND)
    add_executable(simple3DFilter simple3DFilter.cpp)
-    target_link_libraries(simple3DFilter ${tolink})
+    target_link_libraries(simple3DFilter ${tolink}  ${MPI_C_LIBRARIES})

    add_executable(simpleDistanceFilter simpleDistanceFilter.cpp)
    target_link_libraries(simpleDistanceFilter ${tolink})
--- a/sample/simple3DFilter.cpp
+++ b/sample/simple3DFilter.cpp
@ -1,15 +1,18 @@
-#include "openmp.hpp"
-#include "omptl/algorithm"
-#include <cassert>
-#include "yorick.hpp"
-#include "sphSmooth.hpp"
-#include "mykdtree.hpp"
-#include "miniargs.hpp"
 #include <H5Cpp.h>
-#include "hdf5_array.hpp"
-#include <iostream>
-#include <boost/format.hpp>
+#include <mpi.h>
+
 #include <boost/bind.hpp>
+#include <boost/format.hpp>
+#include <cassert>
+#include <iostream>
+
+#include "hdf5_array.hpp"
+#include "miniargs.hpp"
+#include "mykdtree.hpp"
+#include "omptl/algorithm"
+#include "openmp.hpp"
+#include "sphSmooth.hpp"
+#include "yorick.hpp"

 using namespace std;
 using namespace CosmoTool;
@ -27,46 +30,37 @@ typedef boost::multi_array<float, 2> array_type;
 typedef boost::multi_array<float, 3> array3_type;
 typedef boost::multi_array<float, 4> array4_type;

-ComputePrecision getVelocity(const VCoord& v, int i)
-{
-  return v.mass * v.v[i];
-}
+ComputePrecision getVelocity(const VCoord& v, int i) { return v.mass * v.v[i]; }

-ComputePrecision getMass(const VCoord& v)
-{
-  return v.mass;
-}
+ComputePrecision getMass(const VCoord& v) { return v.mass; }

 typedef SPHSmooth<VCoord> MySmooth;
 typedef MySmooth::SPHTree MyTree;
 typedef MyTree::Cell MyCell;

 template <typename FuncT>
-void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx, double cy, double cz,
-                              array3_type& bins, array3_type& arr, FuncT func, double rLimit2)
-{
+void computeInterpolatedField(MyTree* tree1, double boxsize, int Nres,
+                              double cx, double cy, double cz,
+                              array3_type& bins, array3_type& arr, FuncT func,
+                              double rLimit2) {
 #pragma omp parallel
  {
    MySmooth smooth1(tree1, N_SPH);

 #pragma omp for schedule(dynamic)
-    for (int rz = 0; rz < Nres; rz++)
-      {
+    for (int rz = 0; rz < Nres; rz++) {
      double pz = (rz)*boxsize / Nres - cz;

      cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres << endl;
-        for (int ry = 0; ry < Nres; ry++)
-          {
+      for (int ry = 0; ry < Nres; ry++) {
        double py = (ry)*boxsize / Nres - cy;
-            for (int rx = 0; rx < Nres; rx++)
-              {
+        for (int rx = 0; rx < Nres; rx++) {
          double px = (rx)*boxsize / Nres - cx;

          MyTree::coords c = {float(px), float(py), float(pz)};

          double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2];
-                if (r2 > rLimit2)
-                  {
+          if (r2 > rLimit2) {
            arr[rx][ry][rz] = 0;
            continue;
          }
@ -84,29 +78,35 @@ void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx
  }
 }

-int main(int argc, char **argv)
-{
-
+int main(int argc, char** argv) {
+  int provided;
+  MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
+  if (provided < MPI_THREAD_FUNNELED) {
+    std::cerr << "Cannot mix MPI and Threads here. Please recompile with "
+                 "OpenMP or MPI switched off."
+              << std::endl;
+    MPI_Abort(MPI_COMM_WORLD, 99);
+  }
  char *fname1, *fname2;
  double rLimit, boxsize, rLimit2, cx, cy, cz;
  int Nres;

-  MiniArgDesc args[] = {
-    { "INPUT DATA1", &fname1, MINIARG_STRING },
+  MiniArgDesc args[] = {{"INPUT DATA1", &fname1, MINIARG_STRING},
                        {"RADIUS LIMIT", &rLimit, MINIARG_DOUBLE},
                        {"BOXSIZE", &boxsize, MINIARG_DOUBLE},
                        {"RESOLUTION", &Nres, MINIARG_INT},
                        {"CX", &cx, MINIARG_DOUBLE},
                        {"CY", &cy, MINIARG_DOUBLE},
                        {"CZ", &cz, MINIARG_DOUBLE},
-    { 0, 0, MINIARG_NULL }
-  };
+                        {0, 0, MINIARG_NULL}};

-  if (!parseMiniArgs(argc, argv, args))
-    return 1;
+  if (!parseMiniArgs(argc, argv, args)) return 1;

+  int rank, size;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
  H5::H5File in_f(fname1, 0);
-  H5::H5File out_f("fields.h5", H5F_ACC_TRUNC);
+  H5::H5File out_f(boost::str(boost::format("fields_%d.h5", rank), H5F_ACC_TRUNC);
  array_type v1_data;
  uint32_t N1_points, N2_points;

@ -124,15 +124,12 @@ int main(int argc, char **argv)
  MyCell* allCells_1 = new MyCell[N1_points];

 #pragma omp parallel for schedule(static)
-  for (long i = 0; i < Nres*Nres*Nres; i++)
-    bins.data()[i] = 0;
+  for (long i = 0; i < Nres * Nres * Nres; i++) bins.data()[i] = 0;

  cout << "Shuffling data in cells..." << endl;
 #pragma omp parallel for schedule(static)
-  for (int i = 0 ; i < N1_points; i++)
-    {
-      for (int j = 0; j < 3; j++)
-        allCells_1[i].coord[j] = v1_data[i][j];
+  for (int i = 0; i < N1_points; i++) {
+    for (int j = 0; j < 3; j++) allCells_1[i].coord[j] = v1_data[i][j];
    for (int k = 0; k < 3; k++)
      allCells_1[i].val.pValue.v[k] = v1_data[i][3 + k];
    allCells_1[i].val.pValue.mass = v1_data[i][6];
@ -167,23 +164,19 @@ int main(int argc, char **argv)
    MySmooth smooth1(&tree1, N_SPH);

 #pragma omp for schedule(dynamic)
-    for (int rz = 0; rz < Nres; rz++)
-     {
+    for (int rz = 0; rz < Nres; rz++) {
      double pz = (rz)*boxsize / Nres - cz;

      (cout << rz << " / " << Nres << endl).flush();
-        for (int ry = 0; ry < Nres; ry++)
-          {
+      for (int ry = 0; ry < Nres; ry++) {
        double py = (ry)*boxsize / Nres - cy;
-            for (int rx = 0; rx < Nres; rx++)
-              {
+        for (int rx = 0; rx < Nres; rx++) {
          double px = (rx)*boxsize / Nres - cx;

          MyTree::coords c = {float(px), float(py), float(pz)};

          double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2];
-                if (r2 > rLimit2)
-                  {
+          if (r2 > rLimit2) {
            continue;
          }

@ -199,18 +192,36 @@ int main(int argc, char **argv)
      (cout << " Done " << rz << endl).flush();
    }
  }
+  //
+  // Reduction on the cell.weight in the tree.
+  // MPI_Allreduce to act on contiguous arrays.
+  auto tree = smooth1.getTree();
+  auto nodes = tree->getAllNodes();
+  double *weight_array = new double[N1_points];
+  for (size_t c = 0; c < N1_points; c++) {
+    weight_array[c] = allCells[c].val.weight;
+  }
+  MPI_Allreduce(MPI_IN_PLACE, weight_array, N1_points, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  for (size_t c = 0; c < N1_points; c++) {
+    allCells[c].val.weight = weight_array[c];
+  }
+  delete[] weight_array;
+
+  // cell.weights  -> build a 1d array of the particles weight -> MPI_Allreduce -> resend the new weights to the particles
+  //

  cout << "Interpolating..." << endl;

  array3_type interpolated(boost::extents[Nres][Nres][Nres]);

-  computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz,
-                           bins, interpolated, getMass, rLimit2);
+  computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins,
+                           interpolated, getMass, rLimit2);
  hdf5_write_array(out_f, "density", interpolated);
  // out_f.flush();
  for (int i = 0; i < 3; i++) {
-      computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz,
-                               bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2);
+    computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins,
+                             interpolated, boost::bind(getVelocity, _1, i),
+                             rLimit2);
    hdf5_write_array(out_f, str(format("p%d") % i), interpolated);
  }