From 019480c0e0cbc08dd01748eaa81f35c51d01821a Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Mon, 15 May 2017 16:03:17 +0200 Subject: [PATCH] Add support for Python3.5, More bispectrum work --- python/CMakeLists.txt | 4 +- python/_cosmo_bispectrum.cpp | 227 ++++++++++++++++++------------- python/cosmotool/__init__.py | 10 +- python/cosmotool/bispectrum.py | 2 +- python/cosmotool/borg.py | 36 ++--- python_sample/test_bispectrum.py | 23 ++-- 6 files changed, 172 insertions(+), 130 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ede9c5b..63c5312 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -65,10 +65,10 @@ endif() # Discover where to put packages if (NOT PYTHON_SITE_PACKAGES) - execute_process (COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import get_python_lib; print get_python_lib()" OUTPUT_VARIABLE internal_PYTHON_SITE_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process (COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())" OUTPUT_VARIABLE internal_PYTHON_SITE_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE) SET(SYSTEM_PYTHON_SITE_PACKAGES ${internal_PYTHON_SITE_PACKAGES} CACHE PATH "Path to the target system-wide site-package where to install python modules") - execute_process (COMMAND ${PYTHON_EXECUTABLE} -c "from site import USER_SITE; print USER_SITE" OUTPUT_VARIABLE internal_PYTHON_SITE_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process (COMMAND ${PYTHON_EXECUTABLE} -c "from site import USER_SITE; print(USER_SITE)" OUTPUT_VARIABLE internal_PYTHON_SITE_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE) SET(USER_PYTHON_SITE_PACKAGES ${internal_PYTHON_SITE_PACKAGES} CACHE PATH "Path to the target user site-package where to install python modules") mark_as_advanced(USER_PYTHON_SITE_PACKAGES SYSTEM_PYTHON_SITE_PACKAGES) diff --git a/python/_cosmo_bispectrum.cpp b/python/_cosmo_bispectrum.cpp index 9e95a63..94e6491 100644 --- a/python/_cosmo_bispectrum.cpp +++ b/python/_cosmo_bispectrum.cpp @@ -4,54 +4,31 @@ #include #include #include +#include "symbol_visible.hpp" +#include "algo.hpp" using std::cout; using std::endl; using boost::format; - - -#if defined _WIN32 || defined __CYGWIN__ - #ifdef BUILDING_DLL - #ifdef __GNUC__ - #define DLL_PUBLIC __attribute__ ((dllexport)) - #else - #define DLL_PUBLIC __declspec(dllexport) // Note: actually gcc seems to also supports this syntax. - #endif - #else - #ifdef __GNUC__ - #define DLL_PUBLIC __attribute__ ((dllimport)) - #else - #define DLL_PUBLIC __declspec(dllimport) // Note: actually gcc seems to also supports this syntax. - #endif - #endif - #define DLL_LOCAL -#else - #if __GNUC__ >= 4 - #define DLL_PUBLIC __attribute__ ((visibility ("default"))) - #define DLL_LOCAL __attribute__ ((visibility ("hidden"))) - #else - #define DLL_PUBLIC - #define DLL_LOCAL - #endif -#endif - +using CosmoTool::square; struct ModeSet { - size_t N1, N2, N3; - bool omp; + ssize_t N1, N2, N3; + bool half_copy; struct TriangleIterator { ssize_t i1, i2, i3; - size_t N1, N2, N3; + ssize_t N1, N2, N3; + ssize_t first_iteration; TriangleIterator& operator++() { i3++; - if (i3==N3) { i3 = 0; i2++; } - if (i2==N2) { i2 = 0; i1++; } + if (i3==(N3/2+1)) { i3 = first_iteration; i2++; } + if (i2==(N2/2+1)) { i2 = -N2/2; i1++; } return *this; } @@ -62,7 +39,7 @@ struct ModeSet bool in_box() const { ssize_t hN1 = N1/2, hN2 = N2/2, hN3 = N3/2; - return (i1 >= -hN1) && (i1 < hN1) && (i2 >= -hN2) && (i2 < hN2) && (i3 >= -hN3) && (i3 < hN3); + return (i1 >= -hN1) && (i1 <= hN1) && (i2 >= -hN2) && (i2 <= hN2) && (i3 >= -hN3) && (i3 <= hN3); } TriangleIterator operator+(const TriangleIterator& other_t) const { @@ -73,6 +50,23 @@ struct ModeSet t.i3 = (t.i3+other_t.i3); return t; } + + TriangleIterator& inp_array() { + if (i1 < 0) + i1 += N1; + if (i2 < 0) + i2 += N2; + if (i3 < 0) + i3 += N3; + return *this; + } + + TriangleIterator array() const { + TriangleIterator t = *this; + + t.inp_array(); + return t; + } TriangleIterator real() const { TriangleIterator t = *this; @@ -86,7 +80,7 @@ struct ModeSet } double norm() const { - double r1 = i1, r2 = i3, r3 = i3; + double r1 = i1, r2 = i2, r3 = i3; return std::sqrt(r1*r1 + r2*r2 + r3*r3); } void reverse() { i1=-i1; i2=-i2; i3=-i3; } @@ -94,23 +88,30 @@ struct ModeSet TriangleIterator& operator*() { return *this; } }; - ModeSet(size_t N1_, size_t N2_, size_t N3_, bool do_openmp = false) - : N1(N1_), N2(N2_), N3(N3_),omp(do_openmp) { + ModeSet(size_t N1_, size_t N2_, size_t N3_, bool _half_copy = false) + : N1(N1_), N2(N2_), N3(N3_),half_copy(_half_copy) { } TriangleIterator begin() const { TriangleIterator t; - t.i1 = t.i2 = t.i3 = 0; + t.i1 = -N1/2; + t.i2 = -N2/2; + if (half_copy) + t.first_iteration = t.i3 = 0; + else + t.first_iteration = t.i3 = -N3/2; + t.N1 = N1; t.N2 = N2; t.N3 = N3; - t.N1 = N1; return t; } TriangleIterator end() const { TriangleIterator t; - t.i2 = t.i3 = 0; - t.i1 = N1; + t.first_iteration = (half_copy ? 0 : (-N3/2)); + t.i3 = t.first_iteration; + t.i2 = -N2/2; + t.i1 = N1/2+1; t.N1 = N1; t.N2 = N2; t.N3 = N3; @@ -119,10 +120,50 @@ struct ModeSet }; +std::ostream& operator<<(std::ostream& o, const ModeSet::TriangleIterator& t) +{ + o << t.i1 << "," << t.i2 << "," << t.i3; + return o; +} + template static T no_conj(const T& a) { return a; } -extern "C" DLL_PUBLIC +template +static inline void accum_bispec(const Delta& delta_mirror, SubArrayB b_Nt, SubArrayCnt b_B, + const typename Delta::element& v1, const typename Delta::element& v2, + const ModeSet::TriangleIterator& rm1, + const ModeSet::TriangleIterator& rm2, + const ModeSet::TriangleIterator& rm3, + double delta_k, + size_t Nk) +{ + typedef std::complex CType; + + size_t q1 = std::floor(rm1.norm()/delta_k); + if (q1 >= Nk) + return; + + size_t q2 = std::floor(rm2.norm()/delta_k); + if (q2 >= Nk) + return; + + size_t q3 = std::floor(rm3.norm()/delta_k); + if (q3 >= Nk) + return; + + CType prod = v1*v2; + ModeSet::TriangleIterator m3 = rm3; + // We use hermitic symmetry to get -m3, it is just the mode in m3 but conjugated. + m3.reverse(); + m3.inp_array(); + prod *= delta_mirror[m3.i1][m3.i2][m3.i3]; + + b_Nt[q1][q2][q3] ++; + b_B[q1][q2][q3] += prod; +} + +extern "C" CTOOL_DLL_PUBLIC void CosmoTool_compute_bispectrum( double *delta_hat, size_t Nx, size_t Ny, size_t Nz, size_t *Ntriangles, @@ -137,71 +178,68 @@ void CosmoTool_compute_bispectrum( boost::multi_array, 4> b_B(boost::extents[Ntasks][Nk][Nk][Nk]); boost::multi_array b_Nt(boost::extents[Ntasks][Nk][Nk][Nk]); typedef std::complex CType; + boost::multi_array, 3> delta_mirror(boost::extents[Nx][Ny][Nz]); + + // Add hermiticity + for (auto m : ModeSet(Nx, Ny, Nz, true)) { + auto n1 = m; + auto n2 = m.array(); + n1.reverse(); + n1.inp_array(); + delta_mirror[n2.i1][n2.i2][n2.i3] = (a_delta[n2.i1][n2.i2][n2.i3]); + delta_mirror[n1.i1][n1.i2][n1.i3] = std::conj(delta_mirror[n2.i1][n2.i2][n2.i3]); + } // First loop over m1 #pragma omp parallel -{ + { #pragma omp single -{ - for (auto m1 : ModeSet(Nx, Ny, Nz, true)) { -int tid = omp_get_thread_num(); + { + for (auto m1 : ModeSet(Nx, Ny, Nz)) { + auto am1 = m1.array(); + CType v1 = delta_mirror[am1.i1][am1.i2][am1.i3]; + int tid = omp_get_thread_num(); + #pragma omp task -{ - CType v1 = (m1.i3 >= kNz) ? std::conj(a_delta[m1.i1][m1.i2][(m1.N3-m1.i3)%m1.N3]) : a_delta[m1.i1][m1.i2][m1.i3]; - auto rm1 = m1.real(); - // Second mode m2 - for (auto m2 : ModeSet(Nx, Ny, Nz)) { - // Now derive m3 - CType v2 = (m2.i3 >= kNz) ? std::conj(a_delta[m2.i1][m2.i2][(m2.N3-m2.i3)%m2.N3]) : a_delta[m2.i1][m2.i2][m2.i3]; - auto rm2 = m2.real(); - auto m3 = (rm1+rm2); - - if (!m3.in_box()) - continue; + { + auto rm1 = m1.real(); + // Second mode m2 + for (auto m2 : ModeSet(Nx, Ny, Nz)) { + // Now derive m3 + auto am2 = m2.array(); + auto m3 = (m1+m2); + CType v2 = delta_mirror[am2.i1][am2.i2][am2.i3]; - size_t q1 = std::floor(rm1.norm()/delta_k); - size_t q2 = std::floor(rm2.norm()/delta_k); - size_t q3 = std::floor(m3.norm()/delta_k); + // Not in Fourier box, stop here + if (!m3.in_box()) + continue; - if (q1 >= Nk || q2 >= Nk || q3 >= Nk) - continue; + accum_bispec(delta_mirror, b_Nt[tid], b_B[tid], v1, v2, m1, m2, m3, delta_k, Nk); - CType prod = v1*v2; - bool do_conj = false; - // We use hermitic symmetry to get -m3, it is just the mode in m3 but conjugated. - m3.reverse(); - if (m3.i3 > 0) { - assert(m3.i3 < kNz); - } else { - m3.i3 = -m3.i3; - do_conj = !do_conj; } - m3.i1 = (m3.N1 - m1.i1)%m3.N1; - m3.i2 = (m3.N2 - m3.i2)%m3.N2; - - if (do_conj) - prod *= std::conj(a_delta[m3.i1][m3.i2][m3.i3]); - else - prod *= (a_delta[m3.i1][m3.i2][m3.i3]); - - b_Nt[tid][q1][q2][q3] ++; - b_B[tid][q1][q2][q3] += prod; + } + } + } } -} -} -} - - for (int tid = 0; tid < Ntasks; tid++) - for (auto m1 : ModeSet(Nk, Nk, Nk)) { - a_Nt[m1.i1][m1.i2][m1.i3] += b_Nt[tid][m1.i1][m1.i2][m1.i3]; - a_B[m1.i1][m1.i2][m1.i3] += b_B[tid][m1.i1][m1.i2][m1.i3]; - } +#pragma omp taskwait + for (int tid = 0; tid < Ntasks; tid++) { + size_t *b_p = b_Nt[tid].origin(); + size_t *a_p = a_Nt.data(); + std::complex *b_B_p = b_B[tid].origin(); + std::complex *a_B_p = a_B.origin(); +//#pragma omp simd +#pragma omp parallel for + for (size_t q = 0; q < Nk*Nk*Nk; q++) { + a_p[q] += b_p[q]; + a_B_p[q] += b_B_p[q]; + } + } } -extern "C" DLL_PUBLIC +extern "C" CTOOL_DLL_PUBLIC void CosmoTool_compute_powerspectrum( double *delta_hat, size_t Nx, size_t Ny, size_t Nz, size_t *Ncounts, @@ -215,15 +253,16 @@ void CosmoTool_compute_powerspectrum( typedef std::complex CType; // First loop over m1 - for (auto m1 : ModeSet(Nx, Ny, kNz)) { + for (auto m : ModeSet(Nx, Ny, kNz)) { + auto m1 = m.array(); CType& v1 = a_delta[m1.i1][m1.i2][m1.i3]; - size_t q1 = std::floor(m1.norm()/delta_k); + size_t q1 = std::floor(m.norm()/delta_k); if (q1 >= Nk) continue; a_Nc[q1] ++; - a_P[q1] += std::norm(v1); + a_P[q1] += std::norm(v1); } } diff --git a/python/cosmotool/__init__.py b/python/cosmotool/__init__.py index 7e76e2c..63a339e 100644 --- a/python/cosmotool/__init__.py +++ b/python/cosmotool/__init__.py @@ -1,8 +1,8 @@ -from _cosmotool import * -from _project import * -from _cosmo_power import * -from _cosmo_cic import * -from _fast_interp import * +from ._cosmotool import * +from ._project import * +from ._cosmo_power import * +from ._cosmo_cic import * +from ._fast_interp import * from .grafic import writeGrafic, writeWhitePhase, readGrafic, readWhitePhase from .borg import read_borg_vol from .cic import cicParticles diff --git a/python/cosmotool/bispectrum.py b/python/cosmotool/bispectrum.py index 3b4bd90..6247f66 100644 --- a/python/cosmotool/bispectrum.py +++ b/python/cosmotool/bispectrum.py @@ -124,4 +124,4 @@ if __name__=="__main__": delta[3,2,1]=1 b = powerspectrum(delta, 1, 16, fourier=False) a = bispectrum(delta, 1, 16, fourier=False) - print a[0].max() + print(a[0].max()) diff --git a/python/cosmotool/borg.py b/python/cosmotool/borg.py index 2deac20..fe10e2a 100644 --- a/python/cosmotool/borg.py +++ b/python/cosmotool/borg.py @@ -23,9 +23,8 @@ def build_filelist(fdir): fdir=fdir[1:] #eliminate first element for fd in fdir: - fname_0=fname_0+glob.glob(fd+'initial_density_*') - fname_1=fname_1+glob.glob(fd+'final_density_*') - + fname_0=fname_0+glob.glob(fd+'initial_density_*') + fname_1=fname_1+glob.glob(fd+'final_density_*') return fname_0, fname_1 @@ -56,21 +55,22 @@ def read_borg_vol(BORGFILE): r=s.rsplit(' ') if size(r)==5 : - if r[0] =="define": - if r[1]=="Lattice" : N0=int(r[2]) - N1=int(r[3]) - N2=int(r[4]) - - - if size(r)==11 : - if r[4] =="BoundingBox": xmin=float(r[5]) - xmax=float(r[6]) - ymin=float(r[7]) - ymax=float(r[8]) - zmin=float(r[9]) - zmax=float(r[10].rstrip(',')) - - if r[0]=='@1': break + if r[0] =="define": + if r[1]=="Lattice" : + N0=int(r[2]) + N1=int(r[3]) + N2=int(r[4]) + + if size(r)==11 : + if r[4] =="BoundingBox": + xmin=float(r[5]) + xmax=float(r[6]) + ymin=float(r[7]) + ymax=float(r[8]) + zmin=float(r[9]) + zmax=float(r[10].rstrip(',')) + + if r[0]=='@1': break ranges=[] ranges.append(xmin) diff --git a/python_sample/test_bispectrum.py b/python_sample/test_bispectrum.py index 2d509f0..0e0162d 100644 --- a/python_sample/test_bispectrum.py +++ b/python_sample/test_bispectrum.py @@ -3,23 +3,26 @@ import numpy as np import cosmotool as ct def myfun(N): - f=0.10 - d=np.random.normal(size=(N,)*3) + f=0.001 + L = 1.0 + d=np.random.normal(size=(N,)*3) * np.sqrt(float(N))**3 / L**3 rho = d + f *(d*d - np.average(d*d)) + delta = (L/N)**3 - B = ct.bispectrum(rho, 1, N, fourier=False) - P = ct.powerspectrum(rho, 1, N, fourier=False) - PP = P[1]/P[0]/N**3 + B = ct.bispectrum(rho * delta, 1, N, fourier=False) + P = ct.powerspectrum(rho * delta, 1, N, fourier=False) + PP = P[1]/P[0] / L**3 x = PP[:,None,None] * PP[None,:,None] + PP[:,None,None]*PP[None,None,:] + PP[None,:,None]*PP[None,None,:] - BB = B[1]/B[0]/N**6 - + BB = B[1]/B[0] / L**3 + y = BB/x - np.savez("bispec_%d.npz" % N, y=y, B_nt=B[0], B_r=B[1], P_n=P[0], P=P[1], rho=rho); + np.savez("bispec_%d.npz" % N, x=x, y=y, d=d,B_nt=B[0], B_r=B[1], P_n=P[0], P=P[1], BB=BB, rho=rho, PP=PP); -print( timeit.timeit('from __main__ import myfun; myfun(16)', number=1) ) +#print( timeit.timeit('from __main__ import myfun; myfun(16)', number=1) ) +#print( timeit.timeit('from __main__ import myfun; myfun(24)', number=1) ) print( timeit.timeit('from __main__ import myfun; myfun(32)', number=1) ) -print( timeit.timeit('from __main__ import myfun; myfun(64)', number=1) ) +#print( timeit.timeit('from __main__ import myfun; myfun(64)', number=1) )