Tweaks for speed
This commit is contained in:
parent
8ab094ad3d
commit
d875416200
@ -1,22 +1,22 @@
|
|||||||
#include "openmp.hpp"
|
|
||||||
#include "omptl/algorithm"
|
|
||||||
#include <cassert>
|
|
||||||
#include "yorick.hpp"
|
|
||||||
#include "sphSmooth.hpp"
|
|
||||||
#include "mykdtree.hpp"
|
|
||||||
#include "miniargs.hpp"
|
|
||||||
#include <H5Cpp.h>
|
|
||||||
#include "hdf5_array.hpp"
|
#include "hdf5_array.hpp"
|
||||||
#include <iostream>
|
#include "miniargs.hpp"
|
||||||
#include <boost/format.hpp>
|
#include "mykdtree.hpp"
|
||||||
|
#include "omptl/algorithm"
|
||||||
|
#include "openmp.hpp"
|
||||||
|
#include "sphSmooth.hpp"
|
||||||
|
#include "yorick.hpp"
|
||||||
|
#include <H5Cpp.h>
|
||||||
#include <boost/bind.hpp>
|
#include <boost/bind.hpp>
|
||||||
|
#include <boost/format.hpp>
|
||||||
|
#include <cassert>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace CosmoTool;
|
using namespace CosmoTool;
|
||||||
|
|
||||||
#define N_SPH 32
|
#define N_SPH 32
|
||||||
|
|
||||||
struct VCoord{
|
struct VCoord {
|
||||||
float v[3];
|
float v[3];
|
||||||
float mass;
|
float mass;
|
||||||
};
|
};
|
||||||
@ -27,46 +27,42 @@ typedef boost::multi_array<float, 2> array_type;
|
|||||||
typedef boost::multi_array<float, 3> array3_type;
|
typedef boost::multi_array<float, 3> array3_type;
|
||||||
typedef boost::multi_array<float, 4> array4_type;
|
typedef boost::multi_array<float, 4> array4_type;
|
||||||
|
|
||||||
ComputePrecision getVelocity(const VCoord& v, int i)
|
ComputePrecision getVelocity(const VCoord &v, int i) { return v.mass * v.v[i]; }
|
||||||
{
|
|
||||||
return v.mass * v.v[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
ComputePrecision getMass(const VCoord& v)
|
ComputePrecision getMass(const VCoord &v) { return v.mass; }
|
||||||
{
|
|
||||||
return v.mass;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef SPHSmooth<VCoord> MySmooth;
|
typedef SPHSmooth<VCoord> MySmooth;
|
||||||
typedef MySmooth::SPHTree MyTree;
|
typedef MySmooth::SPHTree MyTree;
|
||||||
typedef MyTree::Cell MyCell;
|
typedef MyTree::Cell MyCell;
|
||||||
|
|
||||||
template<typename FuncT>
|
template <typename FuncT>
|
||||||
void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx, double cy, double cz,
|
void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres,
|
||||||
array3_type& bins, array3_type& arr, FuncT func, double rLimit2)
|
double cx, double cy, double cz,
|
||||||
{
|
array3_type &bins, array3_type &arr, FuncT func,
|
||||||
#pragma omp parallel
|
double rLimit2) {
|
||||||
|
int rz_max = 0;
|
||||||
|
#pragma omp parallel shared(rz_max)
|
||||||
{
|
{
|
||||||
MySmooth smooth1(tree1, N_SPH);
|
MySmooth smooth1(tree1, N_SPH);
|
||||||
|
|
||||||
#pragma omp for schedule(dynamic)
|
#pragma omp for collapse(3) schedule(dynamic)
|
||||||
for (int rz = 0; rz < Nres; rz++)
|
for (int rz = 0; rz < Nres; rz++) {
|
||||||
{
|
|
||||||
double pz = (rz)*boxsize/Nres-cz;
|
|
||||||
|
|
||||||
cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres << endl;
|
for (int ry = 0; ry < Nres; ry++) {
|
||||||
for (int ry = 0; ry < Nres; ry++)
|
for (int rx = 0; rx < Nres; rx++) {
|
||||||
{
|
if (rz > rz_max) {
|
||||||
double py = (ry)*boxsize/Nres-cy;
|
rz_max = rz;
|
||||||
for (int rx = 0; rx < Nres; rx++)
|
cout << format("[%d] %d / %d") % smp_get_thread_id() % rz % Nres
|
||||||
{
|
<< endl;
|
||||||
double px = (rx)*boxsize/Nres-cx;
|
}
|
||||||
|
double px = (rx)*boxsize / Nres - cx;
|
||||||
|
double py = (ry)*boxsize / Nres - cy;
|
||||||
|
double pz = (rz)*boxsize / Nres - cz;
|
||||||
|
|
||||||
MyTree::coords c = { float(px), float(py), float(pz) };
|
MyTree::coords c = {float(px), float(py), float(pz)};
|
||||||
|
|
||||||
double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2];
|
double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2];
|
||||||
if (r2 > rLimit2)
|
if (r2 > rLimit2) {
|
||||||
{
|
|
||||||
arr[rx][ry][rz] = 0;
|
arr[rx][ry][rz] = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -84,23 +80,20 @@ void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv) {
|
||||||
{
|
|
||||||
char *fname1, *outFile;
|
char *fname1, *outFile;
|
||||||
double rLimit, boxsize, rLimit2, cx, cy, cz;
|
double rLimit, boxsize, rLimit2, cx, cy, cz;
|
||||||
int Nres;
|
int Nres;
|
||||||
|
|
||||||
MiniArgDesc args[] = {
|
MiniArgDesc args[] = {{"INPUT DATA1", &fname1, MINIARG_STRING},
|
||||||
{ "INPUT DATA1", &fname1, MINIARG_STRING },
|
{"RADIUS LIMIT", &rLimit, MINIARG_DOUBLE},
|
||||||
{ "RADIUS LIMIT", &rLimit, MINIARG_DOUBLE },
|
{"BOXSIZE", &boxsize, MINIARG_DOUBLE},
|
||||||
{ "BOXSIZE", &boxsize, MINIARG_DOUBLE },
|
{"RESOLUTION", &Nres, MINIARG_INT},
|
||||||
{ "RESOLUTION", &Nres, MINIARG_INT },
|
{"CX", &cx, MINIARG_DOUBLE},
|
||||||
{ "CX", &cx, MINIARG_DOUBLE },
|
{"CY", &cy, MINIARG_DOUBLE},
|
||||||
{ "CY", &cy, MINIARG_DOUBLE },
|
{"CZ", &cz, MINIARG_DOUBLE},
|
||||||
{ "CZ", &cz, MINIARG_DOUBLE },
|
{"OUTPUT FILE", &outFile, MINIARG_STRING},
|
||||||
{ "OUTPUT FILE", &outFile, MINIARG_STRING },
|
{0, 0, MINIARG_NULL}};
|
||||||
{ 0, 0, MINIARG_NULL }
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!parseMiniArgs(argc, argv, args))
|
if (!parseMiniArgs(argc, argv, args))
|
||||||
return 1;
|
return 1;
|
||||||
@ -112,7 +105,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
array3_type bins(boost::extents[Nres][Nres][Nres]);
|
array3_type bins(boost::extents[Nres][Nres][Nres]);
|
||||||
|
|
||||||
rLimit2 = rLimit*rLimit;
|
rLimit2 = rLimit * rLimit;
|
||||||
|
|
||||||
hdf5_read_array(in_f, "particles", v1_data);
|
hdf5_read_array(in_f, "particles", v1_data);
|
||||||
assert(v1_data.shape()[1] == 7);
|
assert(v1_data.shape()[1] == 7);
|
||||||
@ -124,30 +117,30 @@ int main(int argc, char **argv)
|
|||||||
MyCell *allCells_1 = new MyCell[N1_points];
|
MyCell *allCells_1 = new MyCell[N1_points];
|
||||||
|
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (uint32_t i = 0; i < Nres*Nres*Nres; i++)
|
for (uint32_t i = 0; i < Nres * Nres * Nres; i++)
|
||||||
bins.data()[i] = 0;
|
bins.data()[i] = 0;
|
||||||
|
|
||||||
cout << "Shuffling data in cells..." << endl;
|
cout << "Shuffling data in cells..." << endl;
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (uint64_t i = 0 ; i < N1_points; i++)
|
for (uint64_t i = 0; i < N1_points; i++) {
|
||||||
{
|
|
||||||
for (int j = 0; j < 3; j++)
|
for (int j = 0; j < 3; j++)
|
||||||
allCells_1[i].coord[j] = v1_data[i][j];
|
allCells_1[i].coord[j] = v1_data[i][j];
|
||||||
for (int k = 0; k < 3; k++)
|
for (int k = 0; k < 3; k++)
|
||||||
allCells_1[i].val.pValue.v[k] = v1_data[i][3+k];
|
allCells_1[i].val.pValue.v[k] = v1_data[i][3 + k];
|
||||||
allCells_1[i].val.pValue.mass = v1_data[i][6];
|
allCells_1[i].val.pValue.mass = v1_data[i][6];
|
||||||
allCells_1[i].active = true;
|
allCells_1[i].active = true;
|
||||||
allCells_1[i].val.weight = 0.0;
|
allCells_1[i].val.weight = 0.0;
|
||||||
|
|
||||||
long rx = floor((allCells_1[i].coord[0]+cx)*Nres/boxsize+0.5);
|
long rx = floor((allCells_1[i].coord[0] + cx) * Nres / boxsize + 0.5);
|
||||||
long ry = floor((allCells_1[i].coord[1]+cy)*Nres/boxsize+0.5);
|
long ry = floor((allCells_1[i].coord[1] + cy) * Nres / boxsize + 0.5);
|
||||||
long rz = floor((allCells_1[i].coord[2]+cz)*Nres/boxsize+0.5);
|
long rz = floor((allCells_1[i].coord[2] + cz) * Nres / boxsize + 0.5);
|
||||||
|
|
||||||
if (rx < 0 || rx >= Nres || ry < 0 || ry >= Nres || rz < 0 || rz >= Nres)
|
if (rx < 0 || rx >= Nres || ry < 0 || ry >= Nres || rz < 0 || rz >= Nres)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
//#pragma omp atomic update
|
auto &b = bins[rx][ry][rz];
|
||||||
bins[rx][ry][rz]++;
|
#pragma omp atomic
|
||||||
|
b++;
|
||||||
}
|
}
|
||||||
v1_data.resize(boost::extents[1][1]);
|
v1_data.resize(boost::extents[1][1]);
|
||||||
|
|
||||||
@ -158,32 +151,31 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
cout << "Creating smoothing filter..." << endl;
|
cout << "Creating smoothing filter..." << endl;
|
||||||
|
|
||||||
// array3_type out_rad_1(boost::extents[Nres][Nres][Nres]);
|
// array3_type out_rad_1(boost::extents[Nres][Nres][Nres]);
|
||||||
|
|
||||||
cout << "Weighing..." << endl;
|
cout << "Weighing..." << endl;
|
||||||
|
|
||||||
#pragma omp parallel
|
int rz_max = 0;
|
||||||
|
#pragma omp parallel shared(rz_max)
|
||||||
{
|
{
|
||||||
MySmooth smooth1(&tree1, N_SPH);
|
MySmooth smooth1(&tree1, N_SPH);
|
||||||
|
|
||||||
#pragma omp for schedule(dynamic)
|
#pragma omp for collapse(3) schedule(dynamic, 8)
|
||||||
for (int rz = 0; rz < Nres; rz++)
|
for (int rz = 0; rz < Nres; rz++) {
|
||||||
{
|
for (int ry = 0; ry < Nres; ry++) {
|
||||||
double pz = (rz)*boxsize/Nres-cz;
|
for (int rx = 0; rx < Nres; rx++) {
|
||||||
|
if (rz > rz_max) {
|
||||||
|
rz_max = rz;
|
||||||
(cout << rz << " / " << Nres << endl).flush();
|
(cout << rz << " / " << Nres << endl).flush();
|
||||||
for (int ry = 0; ry < Nres; ry++)
|
}
|
||||||
{
|
double pz = (rz)*boxsize / Nres - cz;
|
||||||
double py = (ry)*boxsize/Nres-cy;
|
double py = (ry)*boxsize / Nres - cy;
|
||||||
for (int rx = 0; rx < Nres; rx++)
|
double px = (rx)*boxsize / Nres - cx;
|
||||||
{
|
|
||||||
double px = (rx)*boxsize/Nres-cx;
|
|
||||||
|
|
||||||
MyTree::coords c = { float(px), float(py), float(pz) };
|
MyTree::coords c = {float(px), float(py), float(pz)};
|
||||||
|
|
||||||
double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2];
|
double r2 = c[0] * c[0] + c[1] * c[1] + c[2] * c[2];
|
||||||
if (r2 > rLimit2)
|
if (r2 > rLimit2) {
|
||||||
{
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,11 +184,9 @@ int main(int argc, char **argv)
|
|||||||
smooth1.fetchNeighbours(c, numInCell);
|
smooth1.fetchNeighbours(c, numInCell);
|
||||||
else
|
else
|
||||||
smooth1.fetchNeighbours(c);
|
smooth1.fetchNeighbours(c);
|
||||||
#pragma omp critical
|
|
||||||
smooth1.addGridSite(c);
|
smooth1.addGridSite(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(cout << " Done " << rz << endl).flush();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,13 +194,14 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
array3_type interpolated(boost::extents[Nres][Nres][Nres]);
|
array3_type interpolated(boost::extents[Nres][Nres][Nres]);
|
||||||
|
|
||||||
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz,
|
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins,
|
||||||
bins, interpolated, getMass, rLimit2);
|
interpolated, getMass, rLimit2);
|
||||||
hdf5_write_array(out_f, "density", interpolated);
|
hdf5_write_array(out_f, "density", interpolated);
|
||||||
//out_f.flush();
|
// out_f.flush();
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz,
|
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, bins,
|
||||||
bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2);
|
interpolated, boost::bind(getVelocity, _1, i),
|
||||||
|
rLimit2);
|
||||||
hdf5_write_array(out_f, str(format("p%d") % i), interpolated);
|
hdf5_write_array(out_f, str(format("p%d") % i), interpolated);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,7 +192,9 @@ void SPHSmooth<ValType,Ndims>::addGridSite(const typename SPHTree::coords& c)
|
|||||||
{
|
{
|
||||||
ComputePrecision d = internal.distances[i];
|
ComputePrecision d = internal.distances[i];
|
||||||
SPHCell& cell = *(internal.ngb[i]);
|
SPHCell& cell = *(internal.ngb[i]);
|
||||||
cell.val.weight += getKernel(d/internal.smoothRadius) / r3;
|
double kernel_value = getKernel(d/internal.smoothRadius) / r3;
|
||||||
|
#pragma omp atomic
|
||||||
|
cell.val.weight += kernel_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user