Tweaks for speed

This commit is contained in:
Guilhem Lavaux 2022-01-29 08:22:08 +01:00
parent 8ab094ad3d
commit d875416200
2 changed files with 128 additions and 135 deletions

View File

@ -1,15 +1,15 @@
#include "openmp.hpp"
#include "omptl/algorithm"
#include <cassert>
#include "yorick.hpp"
#include "sphSmooth.hpp"
#include "mykdtree.hpp"
#include "miniargs.hpp"
#include <H5Cpp.h>
#include "hdf5_array.hpp" #include "hdf5_array.hpp"
#include <iostream> #include "miniargs.hpp"
#include <boost/format.hpp> #include "mykdtree.hpp"
#include "omptl/algorithm"
#include "openmp.hpp"
#include "sphSmooth.hpp"
#include "yorick.hpp"
#include <H5Cpp.h>
#include <boost/bind.hpp> #include <boost/bind.hpp>
#include <boost/format.hpp>
#include <cassert>
#include <iostream>
using namespace std; using namespace std;
using namespace CosmoTool; using namespace CosmoTool;
@ -27,46 +27,42 @@ typedef boost::multi_array<float, 2> array_type;
typedef boost::multi_array<float, 3> array3_type; typedef boost::multi_array<float, 3> array3_type;
typedef boost::multi_array<float, 4> array4_type; typedef boost::multi_array<float, 4> array4_type;
ComputePrecision getVelocity(const VCoord& v, int i) ComputePrecision getVelocity(const VCoord &v, int i) { return v.mass * v.v[i]; }
{
return v.mass * v.v[i];
}
ComputePrecision getMass(const VCoord& v) ComputePrecision getMass(const VCoord &v) { return v.mass; }
{
return v.mass;
}
typedef SPHSmooth<VCoord> MySmooth; typedef SPHSmooth<VCoord> MySmooth;
typedef MySmooth::SPHTree MyTree; typedef MySmooth::SPHTree MyTree;
typedef MyTree::Cell MyCell; typedef MyTree::Cell MyCell;
template <typename FuncT> template <typename FuncT>
void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx, double cy, double cz, void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres,
array3_type& bins, array3_type& arr, FuncT func, double rLimit2) double cx, double cy, double cz,
{ array3_type &bins, array3_type &arr, FuncT func,
#pragma omp parallel double rLimit2) {
int rz_max = 0;
#pragma omp parallel shared(rz_max)
{ {
MySmooth smooth1(tree1, N_SPH); MySmooth smooth1(tree1, N_SPH);
#pragma omp for schedule(dynamic) #pragma omp for collapse(3) schedule(dynamic)
for (int rz = 0; rz < Nres; rz++) for (int rz = 0; rz < Nres; rz++) {
{
double pz = (rz)*boxsize/Nres-cz;
cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres << endl; for (int ry = 0; ry < Nres; ry++) {
for (int ry = 0; ry < Nres; ry++) for (int rx = 0; rx < Nres; rx++) {
{ if (rz > rz_max) {
double py = (ry)*boxsize/Nres-cy; rz_max = rz;
for (int rx = 0; rx < Nres; rx++) cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres
{ << endl;
}
double px = (rx)*boxsize / Nres - cx; double px = (rx)*boxsize / Nres - cx;
double py = (ry)*boxsize / Nres - cy;
double pz = (rz)*boxsize / Nres - cz;
MyTree::coords c = {float(px), float(py), float(pz)}; MyTree::coords c = {float(px), float(py), float(pz)};
double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2]; double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2];
if (r2 > rLimit2) if (r2 > rLimit2) {
{
arr[rx][ry][rz] = 0; arr[rx][ry][rz] = 0;
continue; continue;
} }
@ -84,14 +80,12 @@ void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx
} }
} }
int main(int argc, char **argv) int main(int argc, char **argv) {
{
char *fname1, *outFile; char *fname1, *outFile;
double rLimit, boxsize, rLimit2, cx, cy, cz; double rLimit, boxsize, rLimit2, cx, cy, cz;
int Nres; int Nres;
MiniArgDesc args[] = { MiniArgDesc args[] = {{"INPUT DATA1", &fname1, MINIARG_STRING},
{ "INPUT DATA1", &fname1, MINIARG_STRING },
{"RADIUS LIMIT", &rLimit, MINIARG_DOUBLE}, {"RADIUS LIMIT", &rLimit, MINIARG_DOUBLE},
{"BOXSIZE", &boxsize, MINIARG_DOUBLE}, {"BOXSIZE", &boxsize, MINIARG_DOUBLE},
{"RESOLUTION", &Nres, MINIARG_INT}, {"RESOLUTION", &Nres, MINIARG_INT},
@ -99,8 +93,7 @@ int main(int argc, char **argv)
{"CY", &cy, MINIARG_DOUBLE}, {"CY", &cy, MINIARG_DOUBLE},
{"CZ", &cz, MINIARG_DOUBLE}, {"CZ", &cz, MINIARG_DOUBLE},
{"OUTPUT FILE", &outFile, MINIARG_STRING}, {"OUTPUT FILE", &outFile, MINIARG_STRING},
{ 0, 0, MINIARG_NULL } {0, 0, MINIARG_NULL}};
};
if (!parseMiniArgs(argc, argv, args)) if (!parseMiniArgs(argc, argv, args))
return 1; return 1;
@ -129,8 +122,7 @@ int main(int argc, char **argv)
cout << "Shuffling data in cells..." << endl; cout << "Shuffling data in cells..." << endl;
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (uint64_t i = 0 ; i < N1_points; i++) for (uint64_t i = 0; i < N1_points; i++) {
{
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
allCells_1[i].coord[j] = v1_data[i][j]; allCells_1[i].coord[j] = v1_data[i][j];
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
@ -146,8 +138,9 @@ int main(int argc, char **argv)
if (rx < 0 || rx >= Nres || ry < 0 || ry >= Nres || rz < 0 || rz >= Nres) if (rx < 0 || rx >= Nres || ry < 0 || ry >= Nres || rz < 0 || rz >= Nres)
continue; continue;
//#pragma omp atomic update auto &b = bins[rx][ry][rz];
bins[rx][ry][rz]++; #pragma omp atomic
b++;
} }
v1_data.resize(boost::extents[1][1]); v1_data.resize(boost::extents[1][1]);
@ -162,28 +155,27 @@ int main(int argc, char **argv)
cout << "Weighing..." << endl; cout << "Weighing..." << endl;
#pragma omp parallel int rz_max = 0;
#pragma omp parallel shared(rz_max)
{ {
MySmooth smooth1(&tree1, N_SPH); MySmooth smooth1(&tree1, N_SPH);
#pragma omp for schedule(dynamic) #pragma omp for collapse(3) schedule(dynamic, 8)
for (int rz = 0; rz < Nres; rz++) for (int rz = 0; rz < Nres; rz++) {
{ for (int ry = 0; ry < Nres; ry++) {
double pz = (rz)*boxsize/Nres-cz; for (int rx = 0; rx < Nres; rx++) {
if (rz > rz_max) {
rz_max = rz;
(cout << rz << " / " << Nres << endl).flush(); (cout << rz << " / " << Nres << endl).flush();
for (int ry = 0; ry < Nres; ry++) }
{ double pz = (rz)*boxsize / Nres - cz;
double py = (ry)*boxsize / Nres - cy; double py = (ry)*boxsize / Nres - cy;
for (int rx = 0; rx < Nres; rx++)
{
double px = (rx)*boxsize / Nres - cx; double px = (rx)*boxsize / Nres - cx;
MyTree::coords c = {float(px), float(py), float(pz)}; MyTree::coords c = {float(px), float(py), float(pz)};
double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2]; double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2];
if (r2 > rLimit2) if (r2 > rLimit2) {
{
continue; continue;
} }
@ -192,11 +184,9 @@ int main(int argc, char **argv)
smooth1.fetchNeighbours(c, numInCell); smooth1.fetchNeighbours(c, numInCell);
else else
smooth1.fetchNeighbours(c); smooth1.fetchNeighbours(c);
#pragma omp critical
smooth1.addGridSite(c); smooth1.addGridSite(c);
} }
} }
(cout << " Done " << rz << endl).flush();
} }
} }
@ -204,13 +194,14 @@ int main(int argc, char **argv)
array3_type interpolated(boost::extents[Nres][Nres][Nres]); array3_type interpolated(boost::extents[Nres][Nres][Nres]);
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins,
bins, interpolated, getMass, rLimit2); interpolated, getMass, rLimit2);
hdf5_write_array(out_f, "density", interpolated); hdf5_write_array(out_f, "density", interpolated);
// out_f.flush(); // out_f.flush();
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins,
bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2); interpolated, boost::bind(getVelocity, _1, i),
rLimit2);
hdf5_write_array(out_f, str(format("p%d") % i), interpolated); hdf5_write_array(out_f, str(format("p%d") % i), interpolated);
} }

View File

@ -192,7 +192,9 @@ void SPHSmooth<ValType,Ndims>::addGridSite(const typename SPHTree::coords& c)
{ {
ComputePrecision d = internal.distances[i]; ComputePrecision d = internal.distances[i];
SPHCell& cell = *(internal.ngb[i]); SPHCell& cell = *(internal.ngb[i]);
cell.val.weight += getKernel(d/internal.smoothRadius) / r3; double kernel_value = getKernel(d/internal.smoothRadius) / r3;
#pragma omp atomic
cell.val.weight += kernel_value;
} }
} }