diff --git a/python/_cosmo_bispectrum.cpp b/python/_cosmo_bispectrum.cpp index 72a14a4..9e95a63 100644 --- a/python/_cosmo_bispectrum.cpp +++ b/python/_cosmo_bispectrum.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -39,6 +40,7 @@ using boost::format; struct ModeSet { size_t N1, N2, N3; + bool omp; struct TriangleIterator { @@ -78,6 +80,8 @@ struct ModeSet t.i1 -= N1; if (t.i2 >= N2/2) t.i2 -= N2; + if (t.i3 >= N3/2) + t.i3 -= N3; return t; } @@ -85,18 +89,21 @@ struct ModeSet double r1 = i1, r2 = i3, r3 = i3; return std::sqrt(r1*r1 + r2*r2 + r3*r3); } + void reverse() { i1=-i1; i2=-i2; i3=-i3; } TriangleIterator& operator*() { return *this; } }; - ModeSet(size_t N1_, size_t N2_, size_t N3_) - : N1(N1_), N2(N2_), N3(N3_) {} + ModeSet(size_t N1_, size_t N2_, size_t N3_, bool do_openmp = false) + : N1(N1_), N2(N2_), N3(N3_),omp(do_openmp) { + } + TriangleIterator begin() const { TriangleIterator t; t.i1 = t.i2 = t.i3 = 0; - t.N1 = N1; t.N2 = N2; t.N3 = N3; + t.N1 = N1; return t; } @@ -112,6 +119,9 @@ struct ModeSet }; +template +static T no_conj(const T& a) { return a; } + extern "C" DLL_PUBLIC void CosmoTool_compute_bispectrum( double *delta_hat, size_t Nx, size_t Ny, size_t Nz, @@ -120,50 +130,75 @@ void CosmoTool_compute_bispectrum( { // First remap to multi_array for easy access size_t kNz = Nz/2+1; + int Ntasks = omp_get_max_threads(); boost::multi_array_ref, 3> a_delta(reinterpret_cast*>(delta_hat), boost::extents[Nx][Ny][kNz]); boost::multi_array_ref a_Nt(Ntriangles, boost::extents[Nk][Nk][Nk]); boost::multi_array_ref, 3> a_B(reinterpret_cast*>(B), boost::extents[Nk][Nk][Nk]); + boost::multi_array, 4> b_B(boost::extents[Ntasks][Nk][Nk][Nk]); + boost::multi_array b_Nt(boost::extents[Ntasks][Nk][Nk][Nk]); typedef std::complex CType; // First loop over m1 - for (auto m1 : ModeSet(Nx, Ny, kNz)) { - CType& v1 = a_delta[m1.i1][m1.i2][m1.i3]; +#pragma omp parallel +{ +#pragma omp single +{ + for (auto m1 : ModeSet(Nx, Ny, Nz, true)) { +int tid = omp_get_thread_num(); +#pragma omp task +{ + CType v1 = (m1.i3 >= kNz) ? std::conj(a_delta[m1.i1][m1.i2][(m1.N3-m1.i3)%m1.N3]) : a_delta[m1.i1][m1.i2][m1.i3]; + auto rm1 = m1.real(); // Second mode m2 - for (auto m2 : ModeSet(Nx, Ny, kNz)) { + for (auto m2 : ModeSet(Nx, Ny, Nz)) { // Now derive m3 - CType& v2 = a_delta[m2.i1][m2.i2][m2.i3]; - auto m3 = (m1.real()+m2.real()); + CType v2 = (m2.i3 >= kNz) ? std::conj(a_delta[m2.i1][m2.i2][(m2.N3-m2.i3)%m2.N3]) : a_delta[m2.i1][m2.i2][m2.i3]; + auto rm2 = m2.real(); + auto m3 = (rm1+rm2); if (!m3.in_box()) continue; - size_t q1 = std::floor(m1.norm()/delta_k); - size_t q2 = std::floor(m2.norm()/delta_k); + size_t q1 = std::floor(rm1.norm()/delta_k); + size_t q2 = std::floor(rm2.norm()/delta_k); size_t q3 = std::floor(m3.norm()/delta_k); - - if (q1 > Nk || q2 > Nk || q3 > Nk) + if (q1 >= Nk || q2 >= Nk || q3 >= Nk) continue; CType prod = v1*v2; + bool do_conj = false; // We use hermitic symmetry to get -m3, it is just the mode in m3 but conjugated. + m3.reverse(); if (m3.i3 > 0) { - if (m3.i1 < 0) m3.i1 += m3.N1; - if (m3.i2 < 0) m3.i2 += m3.N2; - prod *= std::conj(a_delta[m3.i1][m3.i2][m3.i3]); + assert(m3.i3 < kNz); } else { - m3.i1 = -m3.i1; - m3.i2 = -m3.i2; - if (m3.i1 < 0) m3.i1 += m3.N1; - if (m3.i2 < 0) m3.i2 += m3.N2; - prod *= a_delta[m3.i1][m3.i2][m3.i3]; - } + m3.i3 = -m3.i3; + do_conj = !do_conj; + } + m3.i1 = (m3.N1 - m1.i1)%m3.N1; + m3.i2 = (m3.N2 - m3.i2)%m3.N2; -// a_Nt[q1][q2][q3] ++; -// a_B[q1][q2][q3] += prod; + if (do_conj) + prod *= std::conj(a_delta[m3.i1][m3.i2][m3.i3]); + else + prod *= (a_delta[m3.i1][m3.i2][m3.i3]); + + b_Nt[tid][q1][q2][q3] ++; + b_B[tid][q1][q2][q3] += prod; } } } +} +} + + for (int tid = 0; tid < Ntasks; tid++) + for (auto m1 : ModeSet(Nk, Nk, Nk)) { + a_Nt[m1.i1][m1.i2][m1.i3] += b_Nt[tid][m1.i1][m1.i2][m1.i3]; + a_B[m1.i1][m1.i2][m1.i3] += b_B[tid][m1.i1][m1.i2][m1.i3]; + } + +} extern "C" DLL_PUBLIC @@ -185,10 +220,10 @@ void CosmoTool_compute_powerspectrum( size_t q1 = std::floor(m1.norm()/delta_k); - if (q1 > Nk) + if (q1 >= Nk) continue; a_Nc[q1] ++; - a_P[q1] += std::norm(v1); + a_P[q1] += std::norm(v1); } } diff --git a/python/cosmotool/bispectrum.py b/python/cosmotool/bispectrum.py index 729796b..3b4bd90 100644 --- a/python/cosmotool/bispectrum.py +++ b/python/cosmotool/bispectrum.py @@ -11,6 +11,11 @@ try: double *delta_hat, size_t Nx, size_t Ny, size_t Nz, size_t *Ntriangles, double* B, double delta_k, size_t Nk ) ; + void CosmoTool_compute_powerspectrum( + double *delta_hat, size_t Nx, size_t Ny, size_t Nz, + size_t *Ncounts, + double* P, double delta_k, size_t Nk ); + """); _pathlib = os.path.dirname(os.path.abspath(__file__)) @@ -42,6 +47,7 @@ def bispectrum(delta, delta_k, Nk, fourier=True): if not fourier: delta = np.fft.rfftn(delta) N1,N2,N3 = delta.shape + rN3 = (N3-1)*2 delta_hat_buf = np.empty((N1*N2*N3*2),dtype=np.double) delta_hat_buf[::2] = delta.real.ravel() delta_hat_buf[1::2] = delta.imag.ravel() @@ -54,16 +60,16 @@ def bispectrum(delta, delta_k, Nk, fourier=True): else: raise RuntimeError("Internal error, do not know how to map size_t") - B_buf = np.zeros((Nk*Nk*Nk*4), dtype=np.double) + B_buf = np.zeros((Nk*Nk*Nk*2), dtype=np.double) _lib.CosmoTool_compute_bispectrum( \ _ffi.cast("double *", delta_hat_buf.ctypes.data), \ - N1, N2, N3, \ + N1, N2, rN3, \ _ffi.cast("size_t *", triangle_buf.ctypes.data), \ _ffi.cast("double *", B_buf.ctypes.data), \ delta_k, \ Nk) - B_buf = B_buf.reshape((Nk,Nk,Nk,4)) + B_buf = B_buf.reshape((Nk,Nk,Nk,2)) return triangle_buf, B_buf[...,0]+1j*B_buf[...,1] def powerspectrum(delta, delta_k, Nk, fourier=True): @@ -102,7 +108,7 @@ def powerspectrum(delta, delta_k, Nk, fourier=True): B_buf = np.zeros((Nk,), dtype=np.double) - _lib.CosmoTool_compute_bispectrum( \ + _lib.CosmoTool_compute_powerspectrum( \ _ffi.cast("double *", delta_hat_buf.ctypes.data), \ N1, N2, N3, \ _ffi.cast("size_t *", count_buf.ctypes.data), \ diff --git a/python_sample/test_bispectrum.py b/python_sample/test_bispectrum.py index f1f247b..241dade 100644 --- a/python_sample/test_bispectrum.py +++ b/python_sample/test_bispectrum.py @@ -1,10 +1,19 @@ import numpy as np import cosmotool as ct -f=0.01 -d=np.random.normal(size=(16,16,16)) +N=32 +f=0.10 +d=np.random.normal(size=(N,)*3) rho = d + f *(d*d - np.average(d*d)) -B = ct.bispectrum(rho, 1, 16, fourier=False) -P = ct.powerspectrum(rho, 1, 16, fourier=False) +B = ct.bispectrum(rho, 1, N, fourier=False) +P = ct.powerspectrum(rho, 1, N, fourier=False) +PP = P[1]/P[0]/N**3 + +x = PP[:,None,None] * PP[None,:,None] + PP[:,None,None]*PP[None,None,:] + PP[None,:,None]*PP[None,None,:] + +BB = B[1]/B[0]/N**6 + +y = BB/x +