diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d69ebe..e2e45f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,18 @@ set(GSL_FIND_REQUIRED TRUE) FIND_PACKAGE_HANDLE_STANDARD_ARGS(NetCDF DEFAULT_MSG NETCDF_LIBRARY NETCDFCPP_LIBRARY NETCDF_INCLUDE_PATH) FIND_PACKAGE_HANDLE_STANDARD_ARGS(GSL DEFAULT_MSG GSL_LIBRARY GSLCBLAS_LIBRARY GSL_INCLUDE_PATH) +IF(ENABLE_OPENMP) + + IF (NOT OPENMP_FOUND) + MESSAGE(ERROR "No known compiler option for enabling OpenMP") + ENDIF(NOT OPENMP_FOUND) + + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKED_FLAGS} ${OpenMP_C_FLAGS}") +ENDIF(ENABLE_OPENMP) + + set(GSL_LIBRARIES ${GSL_LIBRARY} ${GSLCBLAS_LIBRARY}) diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 64bb51f..5b58290 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -62,7 +62,7 @@ if (ENABLE_SHARP AND SHARP_LIBRARY AND SHARP_INCLUDE_PATH AND EIGEN3_FOUND) include_directories(${SHARP_INCLUDE_PATH}) add_executable(test_healpix_calls test_healpix_calls.cpp) target_link_libraries(test_healpix_calls ${tolink} ${SHARP_LIBRARIES}) - set_target_properties(test_healpix_calls PROPERTIES COMPILE_FLAGS ${OpenMP_CXX_FLAGS} LINK_FLAGS ${OpenMP_CXX_FLAGS}) + set_target_properties(test_healpix_calls PROPERTIES COMPILE_FLAGS "${OpenMP_CXX_FLAGS}" LINK_FLAGS "${OpenMP_CXX_FLAGS}") add_dependencies(test_healpix_calls sharp) endif (ENABLE_SHARP AND SHARP_LIBRARY AND SHARP_INCLUDE_PATH AND EIGEN3_FOUND) diff --git a/sample/testSmooth.cpp b/sample/testSmooth.cpp index 4f3011d..ddb8c3e 100644 --- a/sample/testSmooth.cpp +++ b/sample/testSmooth.cpp @@ -85,14 +85,16 @@ int main() uint32_t dims[] = { NX, NX }; ProgressiveOutput out = ProgressiveOutput::saveArrayProgressive("out.nc", dims, 2); +//#pragma omp parallel for schedule(static) for (uint32_t iy = 0; iy < NX; iy++) { + MySmooth::SPHState state; cout << "iy=" << iy << endl; for (uint32_t ix = 0; ix < NX; ix++) { MyTree::coords c = { 1.0*ix/NX, 1.0*iy/NX }; - smooth.fetchNeighbours(c); - out.put(smooth.computeSmoothedValue(c, unit_fun)); + smooth.fetchNeighbours(c, &state); + out.put(smooth.computeSmoothedValue(c, unit_fun, &state)); } diff --git a/src/sphSmooth.hpp b/src/sphSmooth.hpp index f6fe582..b347178 100644 --- a/src/sphSmooth.hpp +++ b/src/sphSmooth.hpp @@ -35,7 +35,7 @@ knowledge of the CeCILL license and that you accept its terms. #ifndef __COSMOTOOL_SPH_SMOOTH_HPP #define __COSMOTOOL_SPH_SMOOTH_HPP - +#include #include "config.hpp" #include "mykdtree.hpp" @@ -60,37 +60,48 @@ namespace CosmoTool typedef void (*runParticleValue)(ValType& t); public: + struct SPHState + { + boost::shared_ptr ngb; + boost::shared_ptr distances; + typename SPHTree::coords currentCenter; + int currentNgb; + ComputePrecision smoothRadius; + }; + + SPHSmooth(SPHTree *tree, uint32_t Nsph); virtual ~SPHSmooth(); - void fetchNeighbours(const typename SPHTree::coords& c); + void fetchNeighbours(const typename SPHTree::coords& c, SPHState *state = 0); + void fetchNeighbours(const typename SPHTree::coords& c, uint32_t newNsph); void fetchNeighboursOnVolume(const typename SPHTree::coords& c, ComputePrecision radius); const typename SPHTree::coords& getCurrentCenter() const { - return currentCenter; + return internal.currentCenter; } ComputePrecision computeSmoothedValue(const typename SPHTree::coords& c, - computeParticleValue fun); + computeParticleValue fun, SPHState *state = 0); ComputePrecision computeInterpolatedValue(const typename SPHTree::coords& c, - computeParticleValue fun); + computeParticleValue fun, SPHState *state = 0); ComputePrecision getMaxDistance(const typename SPHTree::coords& c, SPHNode *node) const; ComputePrecision getSmoothingLen() const { - return smoothRadius; + return internal.smoothRadius; } // TO USE WITH EXTREME CARE ! void setSmoothingLen(ComputePrecision len) { - smoothRadius = len; + internal.smoothRadius = len; } // END - void runForEachNeighbour(runParticleValue fun); + void runForEachNeighbour(runParticleValue fun, SPHState *state = 0); void addGridSite(const typename SPHTree::coords& c); bool hasNeighbours() const; @@ -100,32 +111,26 @@ namespace CosmoTool SPHTree *getTree() { return tree; } void changeNgb(uint32_t newMax) { - delete[] ngb; - delete[] distances; - ngb = new SPHCell *[newMax]; - distances = new CoordType[newMax]; + internal.ngb = boost::shared_ptr(new SPHCell *[newMax]); + internal.distances = boost::shared_ptr(new CoordType[newMax]); maxNgb = newMax; } - uint32_t getCurrent() const { return currentNgb; } + uint32_t getCurrent() const { return internal.currentNgb; } uint32_t getNgb() const { return maxNgb; } protected: - SPHCell **ngb; - CoordType *distances; + SPHState internal; uint32_t Nsph; uint32_t deltaNsph; uint32_t maxNgb; - uint32_t currentNgb; SPHTree *tree; - ComputePrecision smoothRadius; - typename SPHTree::coords currentCenter; ComputePrecision computeWValue(const typename SPHTree::coords & c, SPHCell& cell, CoordType d, - computeParticleValue fun); + computeParticleValue fun, SPHState *state); void runUnrollNode(SPHNode *node, runParticleValue fun); }; diff --git a/src/sphSmooth.tcc b/src/sphSmooth.tcc index 0cf7de8..b9c9d2e 100644 --- a/src/sphSmooth.tcc +++ b/src/sphSmooth.tcc @@ -1,4 +1,5 @@ #include +#include "algo.hpp" namespace CosmoTool { @@ -7,29 +8,27 @@ SPHSmooth::SPHSmooth(SPHTree *tree, uint32_t Nsph) { this->Nsph = Nsph; this->tree = tree; - this->currentNgb = 0; + internal.currentNgb = 0; this->maxNgb = Nsph; - ngb = new SPHCell *[maxNgb]; - distances = new CoordType[maxNgb]; + internal.ngb = boost::shared_ptr(new SPHCell *[maxNgb]); + internal.distances = boost::shared_ptr(new CoordType[maxNgb]); } template SPHSmooth::~SPHSmooth() { - delete[] ngb; - delete[] distances; } template ComputePrecision SPHSmooth::computeWValue(const typename SPHTree::coords& c, SPHCell& cell, CoordType d, - computeParticleValue fun) + computeParticleValue fun, SPHState *state) { CoordType weight; - d /= smoothRadius; + d /= state->smoothRadius; weight = getKernel(d); if (cell.val.weight != 0) @@ -47,86 +46,91 @@ SPHSmooth::fetchNeighbours(const typename SPHTree::coords& c, uin if (requested > maxNgb) { - delete[] ngb; - delete[] distances; - maxNgb = requested; - ngb = new SPHCell *[maxNgb]; - distances = new CoordType[maxNgb]; + internal.ngb = new SPHCell *[maxNgb]; + internal.distances = new CoordType[maxNgb]; } - memcpy(currentCenter, c, sizeof(c)); - tree->getNearestNeighbours(c, requested, ngb, distances); + memcpy(internal.currentCenter, c, sizeof(c)); + tree->getNearestNeighbours(c, requested, internal.ngb, internal.distances); - currentNgb = 0; - for (uint32_t i = 0; i < requested && ngb[i] != 0; i++,currentNgb++) + internal.currentNgb = 0; + for (uint32_t i = 0; i < requested && internal.ngb[i] != 0; i++,internal.currentNgb++) { - distances[i] = sqrt(distances[i]); - d2 = distances[i]; + internal.distances[i] = sqrt(internal.distances[i]); + d2 = internal.distances[i]; if (d2 > max_dist) max_dist = d2; } - smoothRadius = max_dist / 2; + internal.smoothRadius = max_dist / 2; } template -void -SPHSmooth::fetchNeighbours(const typename SPHTree::coords& c) +void SPHSmooth::fetchNeighbours(const typename SPHTree::coords& c, SPHState *state) { ComputePrecision d2, max_dist = 0; uint32_t requested = Nsph; - memcpy(currentCenter, c, sizeof(c)); - tree->getNearestNeighbours(c, requested, ngb, distances); + if (state != 0) { + state->distances = boost::shared_ptr(new CoordType[Nsph]); + state->ngb = boost::shared_ptr(new SPHCell *[Nsph]); + } else + state = &internal; + + memcpy(state->currentCenter, c, sizeof(c)); + + tree->getNearestNeighbours(c, requested, state->ngb.get(), state->distances.get()); - currentNgb = 0; - for (uint32_t i = 0; i < requested && ngb[i] != 0; i++,currentNgb++) + state->currentNgb = 0; + for (uint32_t i = 0; i < requested && state->ngb[i] != 0; i++,state->currentNgb++) { - distances[i] = sqrt(distances[i]); - d2 = distances[i]; + d2 = internal.distances[i] = sqrt(internal.distances[i]); if (d2 > max_dist) - max_dist = d2; + max_dist = d2; } - smoothRadius = max_dist / 2; -} + state->smoothRadius = max_dist / 2; +} + template void SPHSmooth::fetchNeighboursOnVolume(const typename SPHTree::coords& c, - ComputePrecision radius) + ComputePrecision radius) { uint32_t numPart; ComputePrecision d2, max_dist = 0; - memcpy(currentCenter, c, sizeof(c)); + memcpy(internal.currentCenter, c, sizeof(c)); - currentNgb = tree->getIntersection(c, radius, ngb, distances, + internal.currentNgb = tree->getIntersection(c, radius, internal.ngb, internal.distances, maxNgb); - for (uint32_t i = 0; i < currentNgb; i++) + for (uint32_t i = 0; i < internal.currentNgb; i++) { - distances[i] = sqrt(distances[i]); - d2 = distances[i]; + d2 = internal.distances[i] = sqrt(internal.distances[i]); if (d2 > max_dist) - max_dist = d2; + max_dist = d2; } - smoothRadius = max_dist / 2; + internal.smoothRadius = max_dist / 2; } template ComputePrecision SPHSmooth::computeSmoothedValue(const typename SPHTree::coords& c, - computeParticleValue fun) + computeParticleValue fun, SPHState *state) { + if (state == 0) + state = &internal; + ComputePrecision outputValue = 0; ComputePrecision max_dist = 0; - ComputePrecision r3 = smoothRadius * smoothRadius * smoothRadius; + ComputePrecision r3 = cube(state->smoothRadius); - for (uint32_t i = 0; i < currentNgb; i++) + for (uint32_t i = 0; i < state->currentNgb; i++) { - outputValue += computeWValue(c, *ngb[i], distances[i], fun); + outputValue += computeWValue(c, *state->ngb[i], state->distances[i], fun, state); } return outputValue / r3; @@ -141,27 +145,33 @@ ComputePrecision interpolateOne(const ValType& t) // WARNING ! Cell's weight must be 1 !!! template ComputePrecision SPHSmooth::computeInterpolatedValue(const typename SPHTree::coords& c, - computeParticleValue fun) + computeParticleValue fun, SPHState *state) { + if (state == 0) + state = &internal; + ComputePrecision outputValue = 0; ComputePrecision max_dist = 0; ComputePrecision weight = 0; - for (uint32_t i = 0; i < currentNgb; i++) + for (uint32_t i = 0; i < state->currentNgb; i++) { - outputValue += computeWValue(c, *ngb[i], distances[i], fun); - weight += computeWValue(c, *ngb[i], distances[i], interpolateOne); + outputValue += computeWValue(c, *state->ngb[i], state->distances[i], fun); + weight += computeWValue(c, *state->ngb[i], state->distances[i], interpolateOne); } return (outputValue == 0) ? 0 : (outputValue / weight); } template -void SPHSmooth::runForEachNeighbour(runParticleValue fun) +void SPHSmooth::runForEachNeighbour(runParticleValue fun, SPHState *state) { - for (uint32_t i = 0; i < currentNgb; i++) + if (state == 0) + state = &internal; + + for (uint32_t i = 0; i < state->currentNgb; i++) { - fun(ngb[i]); + fun(state->ngb[i]); } } @@ -172,13 +182,13 @@ void SPHSmooth::addGridSite(const typename SPHTree::coords& c) ComputePrecision outputValue = 0; ComputePrecision max_dist = 0; - ComputePrecision r3 = smoothRadius * smoothRadius * smoothRadius; + ComputePrecision r3 = cube(internal.smoothRadius); - for (uint32_t i = 0; i < currentNgb; i++) + for (uint32_t i = 0; i < internal.currentNgb; i++) { - ComputePrecision d = distances[i]; - SPHCell& cell = *ngb[i]; - cell.val.weight += getKernel(d/smoothRadius) / r3; + ComputePrecision d = internal.distances[i]; + SPHCell& cell = *internal.ngb[i]; + cell.val.weight += getKernel(d/internal.smoothRadius) / r3; } } @@ -202,7 +212,7 @@ SPHSmooth::getKernel(ComputePrecision x) const template bool SPHSmooth::hasNeighbours() const { - return (currentNgb != 0); + return (internal.currentNgb != 0); } template