Added parallelization. Fixes
This commit is contained in:
parent
3d67b66ae0
commit
9ef2b008b0
3 changed files with 83 additions and 33 deletions
|
@ -1,3 +1,4 @@
|
|||
#include <omp.h>
|
||||
#include <iostream>
|
||||
#include <boost/format.hpp>
|
||||
#include <boost/multi_array.hpp>
|
||||
|
@ -39,6 +40,7 @@ using boost::format;
|
|||
struct ModeSet
|
||||
{
|
||||
size_t N1, N2, N3;
|
||||
bool omp;
|
||||
|
||||
struct TriangleIterator
|
||||
{
|
||||
|
@ -78,6 +80,8 @@ struct ModeSet
|
|||
t.i1 -= N1;
|
||||
if (t.i2 >= N2/2)
|
||||
t.i2 -= N2;
|
||||
if (t.i3 >= N3/2)
|
||||
t.i3 -= N3;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -85,18 +89,21 @@ struct ModeSet
|
|||
double r1 = i1, r2 = i3, r3 = i3;
|
||||
return std::sqrt(r1*r1 + r2*r2 + r3*r3);
|
||||
}
|
||||
void reverse() { i1=-i1; i2=-i2; i3=-i3; }
|
||||
|
||||
TriangleIterator& operator*() { return *this; }
|
||||
};
|
||||
|
||||
ModeSet(size_t N1_, size_t N2_, size_t N3_)
|
||||
: N1(N1_), N2(N2_), N3(N3_) {}
|
||||
ModeSet(size_t N1_, size_t N2_, size_t N3_, bool do_openmp = false)
|
||||
: N1(N1_), N2(N2_), N3(N3_),omp(do_openmp) {
|
||||
}
|
||||
|
||||
TriangleIterator begin() const {
|
||||
TriangleIterator t;
|
||||
t.i1 = t.i2 = t.i3 = 0;
|
||||
t.N1 = N1;
|
||||
t.N2 = N2;
|
||||
t.N3 = N3;
|
||||
t.N1 = N1;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -112,6 +119,9 @@ struct ModeSet
|
|||
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
static T no_conj(const T& a) { return a; }
|
||||
|
||||
extern "C" DLL_PUBLIC
|
||||
void CosmoTool_compute_bispectrum(
|
||||
double *delta_hat, size_t Nx, size_t Ny, size_t Nz,
|
||||
|
@ -120,50 +130,75 @@ void CosmoTool_compute_bispectrum(
|
|||
{
|
||||
// First remap to multi_array for easy access
|
||||
size_t kNz = Nz/2+1;
|
||||
int Ntasks = omp_get_max_threads();
|
||||
boost::multi_array_ref<std::complex<double>, 3> a_delta(reinterpret_cast<std::complex<double>*>(delta_hat), boost::extents[Nx][Ny][kNz]);
|
||||
boost::multi_array_ref<size_t, 3> a_Nt(Ntriangles, boost::extents[Nk][Nk][Nk]);
|
||||
boost::multi_array_ref<std::complex<double>, 3> a_B(reinterpret_cast<std::complex<double>*>(B), boost::extents[Nk][Nk][Nk]);
|
||||
boost::multi_array<std::complex<double>, 4> b_B(boost::extents[Ntasks][Nk][Nk][Nk]);
|
||||
boost::multi_array<size_t, 4> b_Nt(boost::extents[Ntasks][Nk][Nk][Nk]);
|
||||
typedef std::complex<double> CType;
|
||||
|
||||
// First loop over m1
|
||||
for (auto m1 : ModeSet(Nx, Ny, kNz)) {
|
||||
CType& v1 = a_delta[m1.i1][m1.i2][m1.i3];
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp single
|
||||
{
|
||||
for (auto m1 : ModeSet(Nx, Ny, Nz, true)) {
|
||||
int tid = omp_get_thread_num();
|
||||
#pragma omp task
|
||||
{
|
||||
CType v1 = (m1.i3 >= kNz) ? std::conj(a_delta[m1.i1][m1.i2][(m1.N3-m1.i3)%m1.N3]) : a_delta[m1.i1][m1.i2][m1.i3];
|
||||
auto rm1 = m1.real();
|
||||
// Second mode m2
|
||||
for (auto m2 : ModeSet(Nx, Ny, kNz)) {
|
||||
for (auto m2 : ModeSet(Nx, Ny, Nz)) {
|
||||
// Now derive m3
|
||||
CType& v2 = a_delta[m2.i1][m2.i2][m2.i3];
|
||||
auto m3 = (m1.real()+m2.real());
|
||||
CType v2 = (m2.i3 >= kNz) ? std::conj(a_delta[m2.i1][m2.i2][(m2.N3-m2.i3)%m2.N3]) : a_delta[m2.i1][m2.i2][m2.i3];
|
||||
auto rm2 = m2.real();
|
||||
auto m3 = (rm1+rm2);
|
||||
|
||||
if (!m3.in_box())
|
||||
continue;
|
||||
|
||||
size_t q1 = std::floor(m1.norm()/delta_k);
|
||||
size_t q2 = std::floor(m2.norm()/delta_k);
|
||||
size_t q1 = std::floor(rm1.norm()/delta_k);
|
||||
size_t q2 = std::floor(rm2.norm()/delta_k);
|
||||
size_t q3 = std::floor(m3.norm()/delta_k);
|
||||
|
||||
|
||||
if (q1 > Nk || q2 > Nk || q3 > Nk)
|
||||
if (q1 >= Nk || q2 >= Nk || q3 >= Nk)
|
||||
continue;
|
||||
|
||||
CType prod = v1*v2;
|
||||
bool do_conj = false;
|
||||
// We use hermitic symmetry to get -m3, it is just the mode in m3 but conjugated.
|
||||
m3.reverse();
|
||||
if (m3.i3 > 0) {
|
||||
if (m3.i1 < 0) m3.i1 += m3.N1;
|
||||
if (m3.i2 < 0) m3.i2 += m3.N2;
|
||||
prod *= std::conj(a_delta[m3.i1][m3.i2][m3.i3]);
|
||||
assert(m3.i3 < kNz);
|
||||
} else {
|
||||
m3.i1 = -m3.i1;
|
||||
m3.i2 = -m3.i2;
|
||||
if (m3.i1 < 0) m3.i1 += m3.N1;
|
||||
if (m3.i2 < 0) m3.i2 += m3.N2;
|
||||
prod *= a_delta[m3.i1][m3.i2][m3.i3];
|
||||
}
|
||||
m3.i3 = -m3.i3;
|
||||
do_conj = !do_conj;
|
||||
}
|
||||
m3.i1 = (m3.N1 - m1.i1)%m3.N1;
|
||||
m3.i2 = (m3.N2 - m3.i2)%m3.N2;
|
||||
|
||||
// a_Nt[q1][q2][q3] ++;
|
||||
// a_B[q1][q2][q3] += prod;
|
||||
if (do_conj)
|
||||
prod *= std::conj(a_delta[m3.i1][m3.i2][m3.i3]);
|
||||
else
|
||||
prod *= (a_delta[m3.i1][m3.i2][m3.i3]);
|
||||
|
||||
b_Nt[tid][q1][q2][q3] ++;
|
||||
b_B[tid][q1][q2][q3] += prod;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int tid = 0; tid < Ntasks; tid++)
|
||||
for (auto m1 : ModeSet(Nk, Nk, Nk)) {
|
||||
a_Nt[m1.i1][m1.i2][m1.i3] += b_Nt[tid][m1.i1][m1.i2][m1.i3];
|
||||
a_B[m1.i1][m1.i2][m1.i3] += b_B[tid][m1.i1][m1.i2][m1.i3];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
extern "C" DLL_PUBLIC
|
||||
|
@ -185,10 +220,10 @@ void CosmoTool_compute_powerspectrum(
|
|||
|
||||
size_t q1 = std::floor(m1.norm()/delta_k);
|
||||
|
||||
if (q1 > Nk)
|
||||
if (q1 >= Nk)
|
||||
continue;
|
||||
|
||||
a_Nc[q1] ++;
|
||||
a_P[q1] += std::norm(v1);
|
||||
a_P[q1] += std::norm(v1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,11 @@ try:
|
|||
double *delta_hat, size_t Nx, size_t Ny, size_t Nz,
|
||||
size_t *Ntriangles,
|
||||
double* B, double delta_k, size_t Nk ) ;
|
||||
void CosmoTool_compute_powerspectrum(
|
||||
double *delta_hat, size_t Nx, size_t Ny, size_t Nz,
|
||||
size_t *Ncounts,
|
||||
double* P, double delta_k, size_t Nk );
|
||||
|
||||
""");
|
||||
|
||||
_pathlib = os.path.dirname(os.path.abspath(__file__))
|
||||
|
@ -42,6 +47,7 @@ def bispectrum(delta, delta_k, Nk, fourier=True):
|
|||
if not fourier:
|
||||
delta = np.fft.rfftn(delta)
|
||||
N1,N2,N3 = delta.shape
|
||||
rN3 = (N3-1)*2
|
||||
delta_hat_buf = np.empty((N1*N2*N3*2),dtype=np.double)
|
||||
delta_hat_buf[::2] = delta.real.ravel()
|
||||
delta_hat_buf[1::2] = delta.imag.ravel()
|
||||
|
@ -54,16 +60,16 @@ def bispectrum(delta, delta_k, Nk, fourier=True):
|
|||
else:
|
||||
raise RuntimeError("Internal error, do not know how to map size_t")
|
||||
|
||||
B_buf = np.zeros((Nk*Nk*Nk*4), dtype=np.double)
|
||||
B_buf = np.zeros((Nk*Nk*Nk*2), dtype=np.double)
|
||||
|
||||
_lib.CosmoTool_compute_bispectrum( \
|
||||
_ffi.cast("double *", delta_hat_buf.ctypes.data), \
|
||||
N1, N2, N3, \
|
||||
N1, N2, rN3, \
|
||||
_ffi.cast("size_t *", triangle_buf.ctypes.data), \
|
||||
_ffi.cast("double *", B_buf.ctypes.data), \
|
||||
delta_k, \
|
||||
Nk)
|
||||
B_buf = B_buf.reshape((Nk,Nk,Nk,4))
|
||||
B_buf = B_buf.reshape((Nk,Nk,Nk,2))
|
||||
return triangle_buf, B_buf[...,0]+1j*B_buf[...,1]
|
||||
|
||||
def powerspectrum(delta, delta_k, Nk, fourier=True):
|
||||
|
@ -102,7 +108,7 @@ def powerspectrum(delta, delta_k, Nk, fourier=True):
|
|||
|
||||
B_buf = np.zeros((Nk,), dtype=np.double)
|
||||
|
||||
_lib.CosmoTool_compute_bispectrum( \
|
||||
_lib.CosmoTool_compute_powerspectrum( \
|
||||
_ffi.cast("double *", delta_hat_buf.ctypes.data), \
|
||||
N1, N2, N3, \
|
||||
_ffi.cast("size_t *", count_buf.ctypes.data), \
|
||||
|
|
|
@ -1,10 +1,19 @@
|
|||
import numpy as np
|
||||
import cosmotool as ct
|
||||
|
||||
f=0.01
|
||||
d=np.random.normal(size=(16,16,16))
|
||||
N=32
|
||||
f=0.10
|
||||
d=np.random.normal(size=(N,)*3)
|
||||
rho = d + f *(d*d - np.average(d*d))
|
||||
|
||||
B = ct.bispectrum(rho, 1, 16, fourier=False)
|
||||
P = ct.powerspectrum(rho, 1, 16, fourier=False)
|
||||
B = ct.bispectrum(rho, 1, N, fourier=False)
|
||||
P = ct.powerspectrum(rho, 1, N, fourier=False)
|
||||
PP = P[1]/P[0]/N**3
|
||||
|
||||
x = PP[:,None,None] * PP[None,:,None] + PP[:,None,None]*PP[None,None,:] + PP[None,:,None]*PP[None,None,:]
|
||||
|
||||
BB = B[1]/B[0]/N**6
|
||||
|
||||
y = BB/x
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue