#include #include #include #include #include #include using std::cout; using std::endl; using boost::format; #if defined _WIN32 || defined __CYGWIN__ #ifdef BUILDING_DLL #ifdef __GNUC__ #define DLL_PUBLIC __attribute__ ((dllexport)) #else #define DLL_PUBLIC __declspec(dllexport) // Note: actually gcc seems to also supports this syntax. #endif #else #ifdef __GNUC__ #define DLL_PUBLIC __attribute__ ((dllimport)) #else #define DLL_PUBLIC __declspec(dllimport) // Note: actually gcc seems to also supports this syntax. #endif #endif #define DLL_LOCAL #else #if __GNUC__ >= 4 #define DLL_PUBLIC __attribute__ ((visibility ("default"))) #define DLL_LOCAL __attribute__ ((visibility ("hidden"))) #else #define DLL_PUBLIC #define DLL_LOCAL #endif #endif struct ModeSet { size_t N1, N2, N3; bool omp; struct TriangleIterator { ssize_t i1, i2, i3; size_t N1, N2, N3; TriangleIterator& operator++() { i3++; if (i3==N3) { i3 = 0; i2++; } if (i2==N2) { i2 = 0; i1++; } return *this; } bool operator!=(const TriangleIterator& t) const { return i1!=t.i1 || i2!=t.i2 || i3 != t.i3; } bool in_box() const { ssize_t hN1 = N1/2, hN2 = N2/2, hN3 = N3/2; return (i1 >= -hN1) && (i1 < hN1) && (i2 >= -hN2) && (i2 < hN2) && (i3 >= -hN3) && (i3 < hN3); } TriangleIterator operator+(const TriangleIterator& other_t) const { TriangleIterator t = *this; t.i1 = (t.i1+other_t.i1); t.i2 = (t.i2+other_t.i2); t.i3 = (t.i3+other_t.i3); return t; } TriangleIterator real() const { TriangleIterator t = *this; if (t.i1 >= N1/2) t.i1 -= N1; if (t.i2 >= N2/2) t.i2 -= N2; if (t.i3 >= N3/2) t.i3 -= N3; return t; } double norm() const { double r1 = i1, r2 = i3, r3 = i3; return std::sqrt(r1*r1 + r2*r2 + r3*r3); } void reverse() { i1=-i1; i2=-i2; i3=-i3; } TriangleIterator& operator*() { return *this; } }; ModeSet(size_t N1_, size_t N2_, size_t N3_, bool do_openmp = false) : N1(N1_), N2(N2_), N3(N3_),omp(do_openmp) { } TriangleIterator begin() const { TriangleIterator t; t.i1 = t.i2 = t.i3 = 0; t.N2 = N2; t.N3 = N3; t.N1 = N1; return t; } TriangleIterator end() const { TriangleIterator t; t.i2 = t.i3 = 0; t.i1 = N1; t.N1 = N1; t.N2 = N2; t.N3 = N3; return t; } }; template static T no_conj(const T& a) { return a; } extern "C" DLL_PUBLIC void CosmoTool_compute_bispectrum( double *delta_hat, size_t Nx, size_t Ny, size_t Nz, size_t *Ntriangles, double* B, double delta_k, size_t Nk ) { // First remap to multi_array for easy access size_t kNz = Nz/2+1; int Ntasks = omp_get_max_threads(); boost::multi_array_ref, 3> a_delta(reinterpret_cast*>(delta_hat), boost::extents[Nx][Ny][kNz]); boost::multi_array_ref a_Nt(Ntriangles, boost::extents[Nk][Nk][Nk]); boost::multi_array_ref, 3> a_B(reinterpret_cast*>(B), boost::extents[Nk][Nk][Nk]); boost::multi_array, 4> b_B(boost::extents[Ntasks][Nk][Nk][Nk]); boost::multi_array b_Nt(boost::extents[Ntasks][Nk][Nk][Nk]); typedef std::complex CType; // First loop over m1 #pragma omp parallel { #pragma omp single { for (auto m1 : ModeSet(Nx, Ny, Nz, true)) { int tid = omp_get_thread_num(); #pragma omp task { CType v1 = (m1.i3 >= kNz) ? std::conj(a_delta[m1.i1][m1.i2][(m1.N3-m1.i3)%m1.N3]) : a_delta[m1.i1][m1.i2][m1.i3]; auto rm1 = m1.real(); // Second mode m2 for (auto m2 : ModeSet(Nx, Ny, Nz)) { // Now derive m3 CType v2 = (m2.i3 >= kNz) ? std::conj(a_delta[m2.i1][m2.i2][(m2.N3-m2.i3)%m2.N3]) : a_delta[m2.i1][m2.i2][m2.i3]; auto rm2 = m2.real(); auto m3 = (rm1+rm2); if (!m3.in_box()) continue; size_t q1 = std::floor(rm1.norm()/delta_k); size_t q2 = std::floor(rm2.norm()/delta_k); size_t q3 = std::floor(m3.norm()/delta_k); if (q1 >= Nk || q2 >= Nk || q3 >= Nk) continue; CType prod = v1*v2; bool do_conj = false; // We use hermitic symmetry to get -m3, it is just the mode in m3 but conjugated. m3.reverse(); if (m3.i3 > 0) { assert(m3.i3 < kNz); } else { m3.i3 = -m3.i3; do_conj = !do_conj; } m3.i1 = (m3.N1 - m1.i1)%m3.N1; m3.i2 = (m3.N2 - m3.i2)%m3.N2; if (do_conj) prod *= std::conj(a_delta[m3.i1][m3.i2][m3.i3]); else prod *= (a_delta[m3.i1][m3.i2][m3.i3]); b_Nt[tid][q1][q2][q3] ++; b_B[tid][q1][q2][q3] += prod; } } } } } for (int tid = 0; tid < Ntasks; tid++) for (auto m1 : ModeSet(Nk, Nk, Nk)) { a_Nt[m1.i1][m1.i2][m1.i3] += b_Nt[tid][m1.i1][m1.i2][m1.i3]; a_B[m1.i1][m1.i2][m1.i3] += b_B[tid][m1.i1][m1.i2][m1.i3]; } } extern "C" DLL_PUBLIC void CosmoTool_compute_powerspectrum( double *delta_hat, size_t Nx, size_t Ny, size_t Nz, size_t *Ncounts, double* P, double delta_k, size_t Nk ) { // First remap to multi_array for easy access size_t kNz = Nz/2+1; boost::multi_array_ref, 3> a_delta(reinterpret_cast*>(delta_hat), boost::extents[Nx][Ny][kNz]); boost::multi_array_ref a_Nc(Ncounts, boost::extents[Nk]); boost::multi_array_ref a_P(reinterpret_cast(P), boost::extents[Nk]); typedef std::complex CType; // First loop over m1 for (auto m1 : ModeSet(Nx, Ny, kNz)) { CType& v1 = a_delta[m1.i1][m1.i2][m1.i3]; size_t q1 = std::floor(m1.norm()/delta_k); if (q1 >= Nk) continue; a_Nc[q1] ++; a_P[q1] += std::norm(v1); } }